diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2008-10-23 09:50:12 -0700 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2008-10-23 09:50:12 -0700 |
commit | 3e5cce627cfaa35410ab77c50641c52b7096d3b5 (patch) | |
tree | 74d039845f6d532a5d7f9623c92469944809a19f | |
parent | f2e4bd2b37bf072babc6a1f6c2a7ef53b7b046ad (diff) | |
parent | 51157b4ab47e1376c2b93cb854b14b637efeaff2 (diff) |
Merge git://git.kernel.org/pub/scm/linux/kernel/git/agk/linux-2.6-dm
* git://git.kernel.org/pub/scm/linux/kernel/git/agk/linux-2.6-dm:
dm: tidy local_init
dm: remove unused flush_all
dm raid1: separate region_hash interface part1
dm: mark split bio as cloned
dm crypt: remove waitqueue
dm crypt: fix async split
dm crypt: tidy sector
dm: remove dm header from targets
dm: publish array_too_big
dm exception store: fix misordered writes
dm exception store: refactor zero_area
dm snapshot: drop unused last_percent
dm snapshot: fix primary_pe race
dm kcopyd: avoid queue shuffle
-rw-r--r-- | drivers/md/Makefile | 2 | ||||
-rw-r--r-- | drivers/md/dm-crypt.c | 56 | ||||
-rw-r--r-- | drivers/md/dm-delay.c | 3 | ||||
-rw-r--r-- | drivers/md/dm-exception-store.c | 108 | ||||
-rw-r--r-- | drivers/md/dm-io.c | 2 | ||||
-rw-r--r-- | drivers/md/dm-kcopyd.c | 14 | ||||
-rw-r--r-- | drivers/md/dm-linear.c | 2 | ||||
-rw-r--r-- | drivers/md/dm-log.c | 2 | ||||
-rw-r--r-- | drivers/md/dm-mpath.c | 3 | ||||
-rw-r--r-- | drivers/md/dm-path-selector.c | 3 | ||||
-rw-r--r-- | drivers/md/dm-raid1.c | 791 | ||||
-rw-r--r-- | drivers/md/dm-region-hash.c | 704 | ||||
-rw-r--r-- | drivers/md/dm-round-robin.c | 3 | ||||
-rw-r--r-- | drivers/md/dm-snap.c | 11 | ||||
-rw-r--r-- | drivers/md/dm-snap.h | 5 | ||||
-rw-r--r-- | drivers/md/dm-stripe.c | 6 | ||||
-rw-r--r-- | drivers/md/dm-zero.c | 2 | ||||
-rw-r--r-- | drivers/md/dm.c | 49 | ||||
-rw-r--r-- | drivers/md/dm.h | 9 | ||||
-rw-r--r-- | include/linux/device-mapper.h | 3 | ||||
-rw-r--r-- | include/linux/dm-region-hash.h | 104 |
21 files changed, 1084 insertions, 798 deletions
diff --git a/drivers/md/Makefile b/drivers/md/Makefile index f1ef33dfd8c..1c615804ea7 100644 --- a/drivers/md/Makefile +++ b/drivers/md/Makefile @@ -34,7 +34,7 @@ obj-$(CONFIG_DM_CRYPT) += dm-crypt.o obj-$(CONFIG_DM_DELAY) += dm-delay.o obj-$(CONFIG_DM_MULTIPATH) += dm-multipath.o dm-round-robin.o obj-$(CONFIG_DM_SNAPSHOT) += dm-snapshot.o -obj-$(CONFIG_DM_MIRROR) += dm-mirror.o dm-log.o +obj-$(CONFIG_DM_MIRROR) += dm-mirror.o dm-log.o dm-region-hash.o obj-$(CONFIG_DM_ZERO) += dm-zero.o quiet_cmd_unroll = UNROLL $@ diff --git a/drivers/md/dm-crypt.c b/drivers/md/dm-crypt.c index 682ef9e6acd..ce26c84af06 100644 --- a/drivers/md/dm-crypt.c +++ b/drivers/md/dm-crypt.c @@ -23,7 +23,7 @@ #include <asm/page.h> #include <asm/unaligned.h> -#include "dm.h" +#include <linux/device-mapper.h> #define DM_MSG_PREFIX "crypt" #define MESG_STR(x) x, sizeof(x) @@ -56,6 +56,7 @@ struct dm_crypt_io { atomic_t pending; int error; sector_t sector; + struct dm_crypt_io *base_io; }; struct dm_crypt_request { @@ -93,7 +94,6 @@ struct crypt_config { struct workqueue_struct *io_queue; struct workqueue_struct *crypt_queue; - wait_queue_head_t writeq; /* * crypto related data @@ -534,6 +534,7 @@ static struct dm_crypt_io *crypt_io_alloc(struct dm_target *ti, io->base_bio = bio; io->sector = sector; io->error = 0; + io->base_io = NULL; atomic_set(&io->pending, 0); return io; @@ -547,6 +548,7 @@ static void crypt_inc_pending(struct dm_crypt_io *io) /* * One of the bios was finished. Check for completion of * the whole request and correctly clean up the buffer. + * If base_io is set, wait for the last fragment to complete. */ static void crypt_dec_pending(struct dm_crypt_io *io) { @@ -555,7 +557,14 @@ static void crypt_dec_pending(struct dm_crypt_io *io) if (!atomic_dec_and_test(&io->pending)) return; - bio_endio(io->base_bio, io->error); + if (likely(!io->base_io)) + bio_endio(io->base_bio, io->error); + else { + if (io->error && !io->base_io->error) + io->base_io->error = io->error; + crypt_dec_pending(io->base_io); + } + mempool_free(io, cc->io_pool); } @@ -646,10 +655,7 @@ static void kcryptd_io_read(struct dm_crypt_io *io) static void kcryptd_io_write(struct dm_crypt_io *io) { struct bio *clone = io->ctx.bio_out; - struct crypt_config *cc = io->target->private; - generic_make_request(clone); - wake_up(&cc->writeq); } static void kcryptd_io(struct work_struct *work) @@ -688,7 +694,6 @@ static void kcryptd_crypt_write_io_submit(struct dm_crypt_io *io, BUG_ON(io->ctx.idx_out < clone->bi_vcnt); clone->bi_sector = cc->start + io->sector; - io->sector += bio_sectors(clone); if (async) kcryptd_queue_io(io); @@ -700,16 +705,18 @@ static void kcryptd_crypt_write_convert(struct dm_crypt_io *io) { struct crypt_config *cc = io->target->private; struct bio *clone; + struct dm_crypt_io *new_io; int crypt_finished; unsigned out_of_pages = 0; unsigned remaining = io->base_bio->bi_size; + sector_t sector = io->sector; int r; /* * Prevent io from disappearing until this function completes. */ crypt_inc_pending(io); - crypt_convert_init(cc, &io->ctx, NULL, io->base_bio, io->sector); + crypt_convert_init(cc, &io->ctx, NULL, io->base_bio, sector); /* * The allocated buffers can be smaller than the whole bio, @@ -726,6 +733,7 @@ static void kcryptd_crypt_write_convert(struct dm_crypt_io *io) io->ctx.idx_out = 0; remaining -= clone->bi_size; + sector += bio_sectors(clone); crypt_inc_pending(io); r = crypt_convert(cc, &io->ctx); @@ -741,6 +749,8 @@ static void kcryptd_crypt_write_convert(struct dm_crypt_io *io) */ if (unlikely(r < 0)) break; + + io->sector = sector; } /* @@ -750,8 +760,33 @@ static void kcryptd_crypt_write_convert(struct dm_crypt_io *io) if (unlikely(out_of_pages)) congestion_wait(WRITE, HZ/100); - if (unlikely(remaining)) - wait_event(cc->writeq, !atomic_read(&io->ctx.pending)); + /* + * With async crypto it is unsafe to share the crypto context + * between fragments, so switch to a new dm_crypt_io structure. + */ + if (unlikely(!crypt_finished && remaining)) { + new_io = crypt_io_alloc(io->target, io->base_bio, + sector); + crypt_inc_pending(new_io); + crypt_convert_init(cc, &new_io->ctx, NULL, + io->base_bio, sector); + new_io->ctx.idx_in = io->ctx.idx_in; + new_io->ctx.offset_in = io->ctx.offset_in; + + /* + * Fragments after the first use the base_io + * pending count. + */ + if (!io->base_io) + new_io->base_io = io; + else { + new_io->base_io = io->base_io; + crypt_inc_pending(io->base_io); + crypt_dec_pending(io); + } + + io = new_io; + } } crypt_dec_pending(io); @@ -1078,7 +1113,6 @@ static int crypt_ctr(struct dm_target *ti, unsigned int argc, char **argv) goto bad_crypt_queue; } - init_waitqueue_head(&cc->writeq); ti->private = cc; return 0; diff --git a/drivers/md/dm-delay.c b/drivers/md/dm-delay.c index bdd37f881c4..848b381f117 100644 --- a/drivers/md/dm-delay.c +++ b/drivers/md/dm-delay.c @@ -13,7 +13,8 @@ #include <linux/bio.h> #include <linux/slab.h> -#include "dm.h" +#include <linux/device-mapper.h> + #include "dm-bio-list.h" #define DM_MSG_PREFIX "delay" diff --git a/drivers/md/dm-exception-store.c b/drivers/md/dm-exception-store.c index 769ab677f8e..01590f3e000 100644 --- a/drivers/md/dm-exception-store.c +++ b/drivers/md/dm-exception-store.c @@ -7,7 +7,6 @@ * This file is released under the GPL. */ -#include "dm.h" #include "dm-snap.h" #include <linux/mm.h> @@ -105,6 +104,11 @@ struct pstore { void *area; /* + * An area of zeros used to clear the next area. + */ + void *zero_area; + + /* * Used to keep track of which metadata area the data in * 'chunk' refers to. */ @@ -149,6 +153,13 @@ static int alloc_area(struct pstore *ps) if (!ps->area) return r; + ps->zero_area = vmalloc(len); + if (!ps->zero_area) { + vfree(ps->area); + return r; + } + memset(ps->zero_area, 0, len); + return 0; } @@ -156,6 +167,8 @@ static void free_area(struct pstore *ps) { vfree(ps->area); ps->area = NULL; + vfree(ps->zero_area); + ps->zero_area = NULL; } struct mdata_req { @@ -220,25 +233,41 @@ static chunk_t area_location(struct pstore *ps, chunk_t area) * Read or write a metadata area. Remembering to skip the first * chunk which holds the header. */ -static int area_io(struct pstore *ps, chunk_t area, int rw) +static int area_io(struct pstore *ps, int rw) { int r; chunk_t chunk; - chunk = area_location(ps, area); + chunk = area_location(ps, ps->current_area); r = chunk_io(ps, chunk, rw, 0); if (r) return r; - ps->current_area = area; return 0; } -static int zero_area(struct pstore *ps, chunk_t area) +static void zero_memory_area(struct pstore *ps) { memset(ps->area, 0, ps->snap->chunk_size << SECTOR_SHIFT); - return area_io(ps, area, WRITE); +} + +static int zero_disk_area(struct pstore *ps, chunk_t area) +{ + struct dm_io_region where = { + .bdev = ps->snap->cow->bdev, + .sector = ps->snap->chunk_size * area_location(ps, area), + .count = ps->snap->chunk_size, + }; + struct dm_io_request io_req = { + .bi_rw = WRITE, + .mem.type = DM_IO_VMA, + .mem.ptr.vma = ps->zero_area, + .client = ps->io_client, + .notify.fn = NULL, + }; + + return dm_io(&io_req, 1, &where, NULL); } static int read_header(struct pstore *ps, int *new_snapshot) @@ -411,15 +440,14 @@ static int insert_exceptions(struct pstore *ps, int *full) static int read_exceptions(struct pstore *ps) { - chunk_t area; int r, full = 1; /* * Keeping reading chunks and inserting exceptions until * we find a partially full area. */ - for (area = 0; full; area++) { - r = area_io(ps, area, READ); + for (ps->current_area = 0; full; ps->current_area++) { + r = area_io(ps, READ); if (r) return r; @@ -428,6 +456,8 @@ static int read_exceptions(struct pstore *ps) return r; } + ps->current_area--; + return 0; } @@ -486,12 +516,13 @@ static int persistent_read_metadata(struct exception_store *store) return r; } - r = zero_area(ps, 0); + ps->current_area = 0; + zero_memory_area(ps); + r = zero_disk_area(ps, 0); if (r) { - DMWARN("zero_area(0) failed"); + DMWARN("zero_disk_area(0) failed"); return r; } - } else { /* * Sanity checks. @@ -551,7 +582,6 @@ static void persistent_commit(struct exception_store *store, void (*callback) (void *, int success), void *callback_context) { - int r; unsigned int i; struct pstore *ps = get_info(store); struct disk_exception de; @@ -572,33 +602,41 @@ static void persistent_commit(struct exception_store *store, cb->context = callback_context; /* - * If there are no more exceptions in flight, or we have - * filled this metadata area we commit the exceptions to - * disk. + * If there are exceptions in flight and we have not yet + * filled this metadata area there's nothing more to do. */ - if (atomic_dec_and_test(&ps->pending_count) || - (ps->current_committed == ps->exceptions_per_area)) { - r = area_io(ps, ps->current_area, WRITE); - if (r) - ps->valid = 0; + if (!atomic_dec_and_test(&ps->pending_count) && + (ps->current_committed != ps->exceptions_per_area)) + return; - /* - * Have we completely filled the current area ? - */ - if (ps->current_committed == ps->exceptions_per_area) { - ps->current_committed = 0; - r = zero_area(ps, ps->current_area + 1); - if (r) - ps->valid = 0; - } + /* + * If we completely filled the current area, then wipe the next one. + */ + if ((ps->current_committed == ps->exceptions_per_area) && + zero_disk_area(ps, ps->current_area + 1)) + ps->valid = 0; - for (i = 0; i < ps->callback_count; i++) { - cb = ps->callbacks + i; - cb->callback(cb->context, r == 0 ? 1 : 0); - } + /* + * Commit exceptions to disk. + */ + if (ps->valid && area_io(ps, WRITE)) + ps->valid = 0; - ps->callback_count = 0; + /* + * Advance to the next area if this one is full. + */ + if (ps->current_committed == ps->exceptions_per_area) { + ps->current_committed = 0; + ps->current_area++; + zero_memory_area(ps); } + + for (i = 0; i < ps->callback_count; i++) { + cb = ps->callbacks + i; + cb->callback(cb->context, ps->valid); + } + + ps->callback_count = 0; } static void persistent_drop(struct exception_store *store) diff --git a/drivers/md/dm-io.c b/drivers/md/dm-io.c index 4789c42d9a3..2fd6d445063 100644 --- a/drivers/md/dm-io.c +++ b/drivers/md/dm-io.c @@ -5,7 +5,7 @@ * This file is released under the GPL. */ -#include "dm.h" +#include <linux/device-mapper.h> #include <linux/bio.h> #include <linux/mempool.h> diff --git a/drivers/md/dm-kcopyd.c b/drivers/md/dm-kcopyd.c index 996802b8a45..3073618269e 100644 --- a/drivers/md/dm-kcopyd.c +++ b/drivers/md/dm-kcopyd.c @@ -22,6 +22,7 @@ #include <linux/vmalloc.h> #include <linux/workqueue.h> #include <linux/mutex.h> +#include <linux/device-mapper.h> #include <linux/dm-kcopyd.h> #include "dm.h" @@ -268,6 +269,17 @@ static void push(struct list_head *jobs, struct kcopyd_job *job) spin_unlock_irqrestore(&kc->job_lock, flags); } + +static void push_head(struct list_head *jobs, struct kcopyd_job *job) +{ + unsigned long flags; + struct dm_kcopyd_client *kc = job->kc; + + spin_lock_irqsave(&kc->job_lock, flags); + list_add(&job->list, jobs); + spin_unlock_irqrestore(&kc->job_lock, flags); +} + /* * These three functions process 1 item from the corresponding * job list. @@ -398,7 +410,7 @@ static int process_jobs(struct list_head *jobs, struct dm_kcopyd_client *kc, * We couldn't service this job ATM, so * push this job back onto the list. */ - push(jobs, job); + push_head(jobs, job); break; } diff --git a/drivers/md/dm-linear.c b/drivers/md/dm-linear.c index 6449bcdf84c..1b29e913675 100644 --- a/drivers/md/dm-linear.c +++ b/drivers/md/dm-linear.c @@ -5,12 +5,12 @@ */ #include "dm.h" - #include <linux/module.h> #include <linux/init.h> #include <linux/blkdev.h> #include <linux/bio.h> #include <linux/slab.h> +#include <linux/device-mapper.h> #define DM_MSG_PREFIX "linear" diff --git a/drivers/md/dm-log.c b/drivers/md/dm-log.c index 5b48478c79f..a8c0fc79ca7 100644 --- a/drivers/md/dm-log.c +++ b/drivers/md/dm-log.c @@ -12,7 +12,7 @@ #include <linux/dm-io.h> #include <linux/dm-dirty-log.h> -#include "dm.h" +#include <linux/device-mapper.h> #define DM_MSG_PREFIX "dirty region log" diff --git a/drivers/md/dm-mpath.c b/drivers/md/dm-mpath.c index 9bf3460c554..abf6e8cfaed 100644 --- a/drivers/md/dm-mpath.c +++ b/drivers/md/dm-mpath.c @@ -5,7 +5,8 @@ * This file is released under the GPL. */ -#include "dm.h" +#include <linux/device-mapper.h> + #include "dm-path-selector.h" #include "dm-bio-list.h" #include "dm-bio-record.h" diff --git a/drivers/md/dm-path-selector.c b/drivers/md/dm-path-selector.c index ca1bb636a3e..96ea226155b 100644 --- a/drivers/md/dm-path-selector.c +++ b/drivers/md/dm-path-selector.c @@ -9,7 +9,8 @@ * Path selector registration. */ -#include "dm.h" +#include <linux/device-mapper.h> + #include "dm-path-selector.h" #include <linux/slab.h> diff --git a/drivers/md/dm-raid1.c b/drivers/md/dm-raid1.c index 29913e42c4a..92dcc06832a 100644 --- a/drivers/md/dm-raid1.c +++ b/drivers/md/dm-raid1.c @@ -1,30 +1,30 @@ /* * Copyright (C) 2003 Sistina Software Limited. + * Copyright (C) 2005-2008 Red Hat, Inc. All rights reserved. * * This file is released under the GPL. */ -#include "dm.h" #include "dm-bio-list.h" #include "dm-bio-record.h" -#include <linux/ctype.h> #include <linux/init.h> #include <linux/mempool.h> #include <linux/module.h> #include <linux/pagemap.h> #include <linux/slab.h> -#include <linux/time.h> -#include <linux/vmalloc.h> #include <linux/workqueue.h> -#include <linux/log2.h> -#include <linux/hardirq.h> +#include <linux/device-mapper.h> #include <linux/dm-io.h> #include <linux/dm-dirty-log.h> #include <linux/dm-kcopyd.h> +#include <linux/dm-region-hash.h> #define DM_MSG_PREFIX "raid1" + +#define MAX_RECOVERY 1 /* Maximum number of regions recovered in parallel. */ #define DM_IO_PAGES 64 +#define DM_KCOPYD_PAGES 64 #define DM_RAID1_HANDLE_ERRORS 0x01 #define errors_handled(p) ((p)->features & DM_RAID1_HANDLE_ERRORS) @@ -32,87 +32,6 @@ static DECLARE_WAIT_QUEUE_HEAD(_kmirrord_recovery_stopped); /*----------------------------------------------------------------- - * Region hash - * - * The mirror splits itself up into discrete regions. Each - * region can be in one of three states: clean, dirty, - * nosync. There is no need to put clean regions in the hash. - * - * In addition to being present in the hash table a region _may_ - * be present on one of three lists. - * - * clean_regions: Regions on this list have no io pending to - * them, they are in sync, we are no longer interested in them, - * they are dull. rh_update_states() will remove them from the - * hash table. - * - * quiesced_regions: These regions have been spun down, ready - * for recovery. rh_recovery_start() will remove regions from - * this list and hand them to kmirrord, which will schedule the - * recovery io with kcopyd. - * - * recovered_regions: Regions that kcopyd has successfully - * recovered. rh_update_states() will now schedule any delayed - * io, up the recovery_count, and remove the region from the - * hash. - * - * There are 2 locks: - * A rw spin lock 'hash_lock' protects just the hash table, - * this is never held in write mode from interrupt context, - * which I believe means that we only have to disable irqs when - * doing a write lock. - * - * An ordinary spin lock 'region_lock' that protects the three - * lists in the region_hash, with the 'state', 'list' and - * 'bhs_delayed' fields of the regions. This is used from irq - * context, so all other uses will have to suspend local irqs. - *---------------------------------------------------------------*/ -struct mirror_set; -struct region_hash { - struct mirror_set *ms; - uint32_t region_size; - unsigned region_shift; - - /* holds persistent region state */ - struct dm_dirty_log *log; - - /* hash table */ - rwlock_t hash_lock; - mempool_t *region_pool; - unsigned int mask; - unsigned int nr_buckets; - struct list_head *buckets; - - spinlock_t region_lock; - atomic_t recovery_in_flight; - struct semaphore recovery_count; - struct list_head clean_regions; - struct list_head quiesced_regions; - struct list_head recovered_regions; - struct list_head failed_recovered_regions; -}; - -enum { - RH_CLEAN, - RH_DIRTY, - RH_NOSYNC, - RH_RECOVERING -}; - -struct region { - struct region_hash *rh; /* FIXME: can we get rid of this ? */ - region_t key; - int state; - - struct list_head hash_list; - struct list_head list; - - atomic_t pending; - struct bio_list delayed_bios; -}; - - -/*----------------------------------------------------------------- * Mirror set structures. *---------------------------------------------------------------*/ enum dm_raid1_error { @@ -132,8 +51,7 @@ struct mirror { struct mirror_set { struct dm_target *ti; struct list_head list; - struct region_hash rh; - struct dm_kcopyd_client *kcopyd_client; + uint64_t features; spinlock_t lock; /* protects the lists */ @@ -141,6 +59,8 @@ struct mirror_set { struct bio_list writes; struct bio_list failures; + struct dm_region_hash *rh; + struct dm_kcopyd_client *kcopyd_client; struct dm_io_client *io_client; mempool_t *read_record_pool; @@ -159,25 +79,14 @@ struct mirror_set { struct work_struct trigger_event; - unsigned int nr_mirrors; + unsigned nr_mirrors; struct mirror mirror[0]; }; -/* - * Conversion fns - */ -static inline region_t bio_to_region(struct region_hash *rh, struct bio *bio) -{ - return (bio->bi_sector - rh->ms->ti->begin) >> rh->region_shift; -} - -static inline sector_t region_to_sector(struct region_hash *rh, region_t region) +static void wakeup_mirrord(void *context) { - return region << rh->region_shift; -} + struct mirror_set *ms = context; -static void wake(struct mirror_set *ms) -{ queue_work(ms->kmirrord_wq, &ms->kmirrord_work); } @@ -186,7 +95,7 @@ static void delayed_wake_fn(unsigned long data) struct mirror_set *ms = (struct mirror_set *) data; clear_bit(0, &ms->timer_pending); - wake(ms); + wakeup_mirrord(ms); } static void delayed_wake(struct mirror_set *ms) @@ -200,473 +109,34 @@ static void delayed_wake(struct mirror_set *ms) add_timer(&ms->timer); } -/* FIXME move this */ -static void queue_bio(struct mirror_set *ms, struct bio *bio, int rw); - -#define MIN_REGIONS 64 -#define MAX_RECOVERY 1 -static int rh_init(struct region_hash *rh, struct mirror_set *ms, - struct dm_dirty_log *log, uint32_t region_size, - region_t nr_regions) +static void wakeup_all_recovery_waiters(void *context) { - unsigned int nr_buckets, max_buckets; - size_t i; - - /* - * Calculate a suitable number of buckets for our hash - * table. - */ - max_buckets = nr_regions >> 6; - for (nr_buckets = 128u; nr_buckets < max_buckets; nr_buckets <<= 1) - ; - nr_buckets >>= 1; - - rh->ms = ms; - rh->log = log; - rh->region_size = region_size; - rh->region_shift = ffs(region_size) - 1; - rwlock_init(&rh->hash_lock); - rh->mask = nr_buckets - 1; - rh->nr_buckets = nr_buckets; - - rh->buckets = vmalloc(nr_buckets * sizeof(*rh->buckets)); - if (!rh->buckets) { - DMERR("unable to allocate region hash memory"); - return -ENOMEM; - } - - for (i = 0; i < nr_buckets; i++) - INIT_LIST_HEAD(rh->buckets + i); - - spin_lock_init(&rh->region_lock); - sema_init(&rh->recovery_count, 0); - atomic_set(&rh->recovery_in_flight, 0); - INIT_LIST_HEAD(&rh->clean_regions); - INIT_LIST_HEAD(&rh->quiesced_regions); - INIT_LIST_HEAD(&rh->recovered_regions); - INIT_LIST_HEAD(&rh->failed_recovered_regions); - - rh->region_pool = mempool_create_kmalloc_pool(MIN_REGIONS, - sizeof(struct region)); - if (!rh->region_pool) { - vfree(rh->buckets); - rh->buckets = NULL; - return -ENOMEM; - } - - return 0; + wake_up_all(&_kmirrord_recovery_stopped); } -static void rh_exit(struct region_hash *rh) -{ - unsigned int h; - struct region *reg, *nreg; - - BUG_ON(!list_empty(&rh->quiesced_regions)); - for (h = 0; h < rh->nr_buckets; h++) { - list_for_each_entry_safe(reg, nreg, rh->buckets + h, hash_list) { - BUG_ON(atomic_read(®->pending)); - mempool_free(reg, rh->region_pool); - } - } - - if (rh->log) - dm_dirty_log_destroy(rh->log); - if (rh->region_pool) - mempool_destroy(rh->region_pool); - vfree(rh->buckets); -} - -#define RH_HASH_MULT 2654435387U - -static inline unsigned int rh_hash(struct region_hash *rh, region_t region) -{ - return (unsigned int) ((region * RH_HASH_MULT) >> 12) & rh->mask; -} - -static struct region *__rh_lookup(struct region_hash *rh, region_t region) -{ - struct region *reg; - - list_for_each_entry (reg, rh->buckets + rh_hash(rh, region), hash_list) - if (reg->key == region) - return reg; - - return NULL; -} - -static void __rh_insert(struct region_hash *rh, struct region *reg) -{ - unsigned int h = rh_hash(rh, reg->key); - list_add(®->hash_list, rh->buckets + h); -} - -static struct region *__rh_alloc(struct region_hash *rh, region_t region) -{ - struct region *reg, *nreg; - - read_unlock(&rh->hash_lock); - nreg = mempool_alloc(rh->region_pool, GFP_ATOMIC); - if (unlikely(!nreg)) - nreg = kmalloc(sizeof(struct region), GFP_NOIO); - nreg->state = rh->log->type->in_sync(rh->log, region, 1) ? - RH_CLEAN : RH_NOSYNC; - nreg->rh = rh; - nreg->key = region; - - INIT_LIST_HEAD(&nreg->list); - - atomic_set(&nreg->pending, 0); - bio_list_init(&nreg->delayed_bios); - write_lock_irq(&rh->hash_lock); - - reg = __rh_lookup(rh, region); - if (reg) - /* we lost the race */ - mempool_free(nreg, rh->region_pool); - - else { - __rh_insert(rh, nreg); - if (nreg->state == RH_CLEAN) { - spin_lock(&rh->region_lock); - list_add(&nreg->list, &rh->clean_regions); - spin_unlock(&rh->region_lock); - } - reg = nreg; - } - write_unlock_irq(&rh->hash_lock); - read_lock(&rh->hash_lock); - - return reg; -} - -static inline struct region *__rh_find(struct region_hash *rh, region_t region) -{ - struct region *reg; - - reg = __rh_lookup(rh, region); - if (!reg) - reg = __rh_alloc(rh, region); - - return reg; -} - -static int rh_state(struct region_hash *rh, region_t region, int may_block) -{ - int r; - struct region *reg; - - read_lock(&rh->hash_lock); - reg = __rh_lookup(rh, region); - read_unlock(&rh->hash_lock); - - if (reg) - return reg->state; - - /* - * The region wasn't in the hash, so we fall back to the - * dirty log. - */ - r = rh->log->type->in_sync(rh->log, region, may_block); - - /* - * Any error from the dirty log (eg. -EWOULDBLOCK) gets - * taken as a RH_NOSYNC - */ - return r == 1 ? RH_CLEAN : RH_NOSYNC; -} - -static inline int rh_in_sync(struct region_hash *rh, - region_t region, int may_block) -{ - int state = rh_state(rh, region, may_block); - return state == RH_CLEAN || state == RH_DIRTY; -} - -static void dispatch_bios(struct mirror_set *ms, struct bio_list *bio_list) -{ - struct bio *bio; - - while ((bio = bio_list_pop(bio_list))) { - queue_bio(ms, bio, WRITE); - } -} - -static void complete_resync_work(struct region *reg, int success) -{ - struct region_hash *rh = reg->rh; - - rh->log->type->set_region_sync(rh->log, reg->key, success); - - /* - * Dispatch the bios before we call 'wake_up_all'. - * This is important because if we are suspending, - * we want to know that recovery is complete and - * the work queue is flushed. If we wake_up_all - * before we dispatch_bios (queue bios and call wake()), - * then we risk suspending before the work queue - * has been properly flushed. - */ - dispatch_bios(rh->ms, ®->delayed_bios); - if (atomic_dec_and_test(&rh->recovery_in_flight)) - wake_up_all(&_kmirrord_recovery_stopped); - up(&rh->recovery_count); -} - -static void rh_update_states(struct region_hash *rh) -{ - struct region *reg, *next; - - LIST_HEAD(clean); - LIST_HEAD(recovered); - LIST_HEAD(failed_recovered); - - /* - * Quickly grab the lists. - */ - write_lock_irq(&rh->hash_lock); - spin_lock(&rh->region_lock); - if (!list_empty(&rh->clean_regions)) { - list_splice_init(&rh->clean_regions, &clean); - - list_for_each_entry(reg, &clean, list) - list_del(®->hash_list); - } - - if (!list_empty(&rh->recovered_regions)) { - list_splice_init(&rh->recovered_regions, &recovered); - - list_for_each_entry (reg, &recovered, list) - list_del(®->hash_list); - } - - if (!list_empty(&rh->failed_recovered_regions)) { - list_splice_init(&rh->failed_recovered_regions, - &failed_recovered); - - list_for_each_entry(reg, &failed_recovered, list) - list_del(®->hash_list); - } - - spin_unlock(&rh->region_lock); - write_unlock_irq(&rh->hash_lock); - - /* - * All the regions on the recovered and clean lists have - * now been pulled out of the system, so no need to do - * any more locking. - */ - list_for_each_entry_safe (reg, next, &recovered, list) { - rh->log->type->clear_region(rh->log, reg->key); - complete_resync_work(reg, 1); - mempool_free(reg, rh->region_pool); - } - - list_for_each_entry_safe(reg, next, &failed_recovered, list) { - complete_resync_work(reg, errors_handled(rh->ms) ? 0 : 1); - mempool_free(reg, rh->region_pool); - } - - list_for_each_entry_safe(reg, next, &clean, list) { - rh->log->type->clear_region(rh->log, reg->key); - mempool_free(reg, rh->region_pool); - } - - rh->log->type->flush(rh->log); -} - -static void rh_inc(struct region_hash *rh, region_t region) -{ |