diff options
author | Ingo Molnar <mingo@elte.hu> | 2009-04-29 14:46:59 +0200 |
---|---|---|
committer | Ingo Molnar <mingo@elte.hu> | 2009-04-29 14:47:05 +0200 |
commit | e7fd5d4b3d240f42c30a9e3d20a4689c4d3a795a (patch) | |
tree | 4ba588631dd8189a818a91c9e3976526071178b6 /drivers/md | |
parent | 1130b0296184bc21806225fd06d533515a99d2db (diff) | |
parent | 56a50adda49b2020156616c4eb15353e0f9ad7de (diff) |
Merge branch 'linus' into perfcounters/core
Merge reason: This brach was on -rc1, refresh it to almost-rc4 to pick up
the latest upstream fixes.
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Diffstat (limited to 'drivers/md')
-rw-r--r-- | drivers/md/bitmap.c | 7 | ||||
-rw-r--r-- | drivers/md/dm-bio-list.h | 117 | ||||
-rw-r--r-- | drivers/md/dm-delay.c | 2 | ||||
-rw-r--r-- | drivers/md/dm-ioctl.c | 21 | ||||
-rw-r--r-- | drivers/md/dm-kcopyd.c | 23 | ||||
-rw-r--r-- | drivers/md/dm-linear.c | 1 | ||||
-rw-r--r-- | drivers/md/dm-mpath.c | 1 | ||||
-rw-r--r-- | drivers/md/dm-raid1.c | 1 | ||||
-rw-r--r-- | drivers/md/dm-region-hash.c | 1 | ||||
-rw-r--r-- | drivers/md/dm-snap.c | 1 | ||||
-rw-r--r-- | drivers/md/dm-table.c | 59 | ||||
-rw-r--r-- | drivers/md/dm.c | 200 | ||||
-rw-r--r-- | drivers/md/dm.h | 1 | ||||
-rw-r--r-- | drivers/md/md.c | 41 | ||||
-rw-r--r-- | drivers/md/md.h | 21 | ||||
-rw-r--r-- | drivers/md/raid1.c | 1 | ||||
-rw-r--r-- | drivers/md/raid10.c | 1 | ||||
-rw-r--r-- | drivers/md/raid5.c | 7 |
18 files changed, 269 insertions, 237 deletions
diff --git a/drivers/md/bitmap.c b/drivers/md/bitmap.c index f8a9f7ab2cb..1fb91edc7de 100644 --- a/drivers/md/bitmap.c +++ b/drivers/md/bitmap.c @@ -1479,6 +1479,7 @@ void bitmap_cond_end_sync(struct bitmap *bitmap, sector_t sector) s += blocks; } bitmap->last_end_sync = jiffies; + sysfs_notify(&bitmap->mddev->kobj, NULL, "sync_completed"); } static void bitmap_set_memory_bits(struct bitmap *bitmap, sector_t offset, int needed) @@ -1589,7 +1590,7 @@ void bitmap_destroy(mddev_t *mddev) int bitmap_create(mddev_t *mddev) { struct bitmap *bitmap; - unsigned long blocks = mddev->resync_max_sectors; + sector_t blocks = mddev->resync_max_sectors; unsigned long chunks; unsigned long pages; struct file *file = mddev->bitmap_file; @@ -1631,8 +1632,8 @@ int bitmap_create(mddev_t *mddev) bitmap->chunkshift = ffz(~bitmap->chunksize); /* now that chunksize and chunkshift are set, we can use these macros */ - chunks = (blocks + CHUNK_BLOCK_RATIO(bitmap) - 1) / - CHUNK_BLOCK_RATIO(bitmap); + chunks = (blocks + CHUNK_BLOCK_RATIO(bitmap) - 1) >> + CHUNK_BLOCK_SHIFT(bitmap); pages = (chunks + PAGE_COUNTER_RATIO - 1) / PAGE_COUNTER_RATIO; BUG_ON(!pages); diff --git a/drivers/md/dm-bio-list.h b/drivers/md/dm-bio-list.h deleted file mode 100644 index 345098b4ca7..00000000000 --- a/drivers/md/dm-bio-list.h +++ /dev/null @@ -1,117 +0,0 @@ -/* - * Copyright (C) 2004 Red Hat UK Ltd. - * - * This file is released under the GPL. - */ - -#ifndef DM_BIO_LIST_H -#define DM_BIO_LIST_H - -#include <linux/bio.h> - -#ifdef CONFIG_BLOCK - -struct bio_list { - struct bio *head; - struct bio *tail; -}; - -static inline int bio_list_empty(const struct bio_list *bl) -{ - return bl->head == NULL; -} - -static inline void bio_list_init(struct bio_list *bl) -{ - bl->head = bl->tail = NULL; -} - -#define bio_list_for_each(bio, bl) \ - for (bio = (bl)->head; bio; bio = bio->bi_next) - -static inline unsigned bio_list_size(const struct bio_list *bl) -{ - unsigned sz = 0; - struct bio *bio; - - bio_list_for_each(bio, bl) - sz++; - - return sz; -} - -static inline void bio_list_add(struct bio_list *bl, struct bio *bio) -{ - bio->bi_next = NULL; - - if (bl->tail) - bl->tail->bi_next = bio; - else - bl->head = bio; - - bl->tail = bio; -} - -static inline void bio_list_add_head(struct bio_list *bl, struct bio *bio) -{ - bio->bi_next = bl->head; - - bl->head = bio; - - if (!bl->tail) - bl->tail = bio; -} - -static inline void bio_list_merge(struct bio_list *bl, struct bio_list *bl2) -{ - if (!bl2->head) - return; - - if (bl->tail) - bl->tail->bi_next = bl2->head; - else - bl->head = bl2->head; - - bl->tail = bl2->tail; -} - -static inline void bio_list_merge_head(struct bio_list *bl, - struct bio_list *bl2) -{ - if (!bl2->head) - return; - - if (bl->head) - bl2->tail->bi_next = bl->head; - else - bl->tail = bl2->tail; - - bl->head = bl2->head; -} - -static inline struct bio *bio_list_pop(struct bio_list *bl) -{ - struct bio *bio = bl->head; - - if (bio) { - bl->head = bl->head->bi_next; - if (!bl->head) - bl->tail = NULL; - - bio->bi_next = NULL; - } - - return bio; -} - -static inline struct bio *bio_list_get(struct bio_list *bl) -{ - struct bio *bio = bl->head; - - bl->head = bl->tail = NULL; - - return bio; -} - -#endif /* CONFIG_BLOCK */ -#endif diff --git a/drivers/md/dm-delay.c b/drivers/md/dm-delay.c index 59ee1b015d2..559dbb52bc8 100644 --- a/drivers/md/dm-delay.c +++ b/drivers/md/dm-delay.c @@ -15,8 +15,6 @@ #include <linux/device-mapper.h> -#include "dm-bio-list.h" - #define DM_MSG_PREFIX "delay" struct delay_c { diff --git a/drivers/md/dm-ioctl.c b/drivers/md/dm-ioctl.c index f01096549a9..823ceba6efa 100644 --- a/drivers/md/dm-ioctl.c +++ b/drivers/md/dm-ioctl.c @@ -1047,6 +1047,19 @@ static int populate_table(struct dm_table *table, return dm_table_complete(table); } +static int table_prealloc_integrity(struct dm_table *t, + struct mapped_device *md) +{ + struct list_head *devices = dm_table_get_devices(t); + struct dm_dev_internal *dd; + + list_for_each_entry(dd, devices, list) + if (bdev_get_integrity(dd->dm_dev.bdev)) + return blk_integrity_register(dm_disk(md), NULL); + + return 0; +} + static int table_load(struct dm_ioctl *param, size_t param_size) { int r; @@ -1068,6 +1081,14 @@ static int table_load(struct dm_ioctl *param, size_t param_size) goto out; } + r = table_prealloc_integrity(t, md); + if (r) { + DMERR("%s: could not register integrity profile.", + dm_device_name(md)); + dm_table_destroy(t); + goto out; + } + down_write(&_hash_lock); hc = dm_get_mdptr(md); if (!hc || hc->md != md) { diff --git a/drivers/md/dm-kcopyd.c b/drivers/md/dm-kcopyd.c index 0a225da2127..3e3fc06cb86 100644 --- a/drivers/md/dm-kcopyd.c +++ b/drivers/md/dm-kcopyd.c @@ -297,7 +297,8 @@ static int run_complete_job(struct kcopyd_job *job) dm_kcopyd_notify_fn fn = job->fn; struct dm_kcopyd_client *kc = job->kc; - kcopyd_put_pages(kc, job->pages); + if (job->pages) + kcopyd_put_pages(kc, job->pages); mempool_free(job, kc->job_pool); fn(read_err, write_err, context); @@ -461,6 +462,7 @@ static void segment_complete(int read_err, unsigned long write_err, sector_t progress = 0; sector_t count = 0; struct kcopyd_job *job = (struct kcopyd_job *) context; + struct dm_kcopyd_client *kc = job->kc; mutex_lock(&job->lock); @@ -490,7 +492,7 @@ static void segment_complete(int read_err, unsigned long write_err, if (count) { int i; - struct kcopyd_job *sub_job = mempool_alloc(job->kc->job_pool, + struct kcopyd_job *sub_job = mempool_alloc(kc->job_pool, GFP_NOIO); *sub_job = *job; @@ -509,13 +511,16 @@ static void segment_complete(int read_err, unsigned long write_err, } else if (atomic_dec_and_test(&job->sub_jobs)) { /* - * To avoid a race we must keep the job around - * until after the notify function has completed. - * Otherwise the client may try and stop the job - * after we've completed. + * Queue the completion callback to the kcopyd thread. + * + * Some callers assume that all the completions are called + * from a single thread and don't race with each other. + * + * We must not call the callback directly here because this + * code may not be executing in the thread. */ - job->fn(read_err, write_err, job->context); - mempool_free(job, job->kc->job_pool); + push(&kc->complete_jobs, job); + wake(kc); } } @@ -528,6 +533,8 @@ static void split_job(struct kcopyd_job *job) { int i; + atomic_inc(&job->kc->nr_jobs); + atomic_set(&job->sub_jobs, SPLIT_COUNT); for (i = 0; i < SPLIT_COUNT; i++) segment_complete(0, 0u, job); diff --git a/drivers/md/dm-linear.c b/drivers/md/dm-linear.c index bfa107f59d9..79fb53e51c7 100644 --- a/drivers/md/dm-linear.c +++ b/drivers/md/dm-linear.c @@ -142,7 +142,6 @@ static struct target_type linear_target = { .status = linear_status, .ioctl = linear_ioctl, .merge = linear_merge, - .features = DM_TARGET_SUPPORTS_BARRIERS, }; int __init dm_linear_init(void) diff --git a/drivers/md/dm-mpath.c b/drivers/md/dm-mpath.c index 095f77bf968..6a386ab4f7e 100644 --- a/drivers/md/dm-mpath.c +++ b/drivers/md/dm-mpath.c @@ -8,7 +8,6 @@ #include <linux/device-mapper.h> #include "dm-path-selector.h" -#include "dm-bio-list.h" #include "dm-bio-record.h" #include "dm-uevent.h" diff --git a/drivers/md/dm-raid1.c b/drivers/md/dm-raid1.c index 536ef0bef15..076fbb4e967 100644 --- a/drivers/md/dm-raid1.c +++ b/drivers/md/dm-raid1.c @@ -5,7 +5,6 @@ * This file is released under the GPL. */ -#include "dm-bio-list.h" #include "dm-bio-record.h" #include <linux/init.h> diff --git a/drivers/md/dm-region-hash.c b/drivers/md/dm-region-hash.c index 59f8d9df9e1..7b899be0b08 100644 --- a/drivers/md/dm-region-hash.c +++ b/drivers/md/dm-region-hash.c @@ -14,7 +14,6 @@ #include <linux/vmalloc.h> #include "dm.h" -#include "dm-bio-list.h" #define DM_MSG_PREFIX "region hash" diff --git a/drivers/md/dm-snap.c b/drivers/md/dm-snap.c index 981a0413068..d73f17fc777 100644 --- a/drivers/md/dm-snap.c +++ b/drivers/md/dm-snap.c @@ -22,7 +22,6 @@ #include <linux/workqueue.h> #include "dm-exception-store.h" -#include "dm-bio-list.h" #define DM_MSG_PREFIX "snapshots" diff --git a/drivers/md/dm-table.c b/drivers/md/dm-table.c index e8361b191b9..429b50b975d 100644 --- a/drivers/md/dm-table.c +++ b/drivers/md/dm-table.c @@ -52,8 +52,6 @@ struct dm_table { sector_t *highs; struct dm_target *targets; - unsigned barriers_supported:1; - /* * Indicates the rw permissions for the new logical * device. This should be a combination of FMODE_READ @@ -243,7 +241,6 @@ int dm_table_create(struct dm_table **result, fmode_t mode, INIT_LIST_HEAD(&t->devices); atomic_set(&t->holders, 0); - t->barriers_supported = 1; if (!num_targets) num_targets = KEYS_PER_NODE; @@ -751,10 +748,6 @@ int dm_table_add_target(struct dm_table *t, const char *type, /* FIXME: the plan is to combine high here and then have * the merge fn apply the target level restrictions. */ combine_restrictions_low(&t->limits, &tgt->limits); - - if (!(tgt->type->features & DM_TARGET_SUPPORTS_BARRIERS)) - t->barriers_supported = 0; - return 0; bad: @@ -799,12 +792,6 @@ int dm_table_complete(struct dm_table *t) check_for_valid_limits(&t->limits); - /* - * We only support barriers if there is exactly one underlying device. - */ - if (!list_is_singular(&t->devices)) - t->barriers_supported = 0; - /* how many indexes will the btree have ? */ leaf_nodes = dm_div_up(t->num_targets, KEYS_PER_NODE); t->depth = 1 + int_log(leaf_nodes, CHILDREN_PER_NODE); @@ -879,6 +866,45 @@ struct dm_target *dm_table_find_target(struct dm_table *t, sector_t sector) return &t->targets[(KEYS_PER_NODE * n) + k]; } +/* + * Set the integrity profile for this device if all devices used have + * matching profiles. + */ +static void dm_table_set_integrity(struct dm_table *t) +{ + struct list_head *devices = dm_table_get_devices(t); + struct dm_dev_internal *prev = NULL, *dd = NULL; + + if (!blk_get_integrity(dm_disk(t->md))) + return; + + list_for_each_entry(dd, devices, list) { + if (prev && + blk_integrity_compare(prev->dm_dev.bdev->bd_disk, + dd->dm_dev.bdev->bd_disk) < 0) { + DMWARN("%s: integrity not set: %s and %s mismatch", + dm_device_name(t->md), + prev->dm_dev.bdev->bd_disk->disk_name, + dd->dm_dev.bdev->bd_disk->disk_name); + goto no_integrity; + } + prev = dd; + } + + if (!prev || !bdev_get_integrity(prev->dm_dev.bdev)) + goto no_integrity; + + blk_integrity_register(dm_disk(t->md), + bdev_get_integrity(prev->dm_dev.bdev)); + + return; + +no_integrity: + blk_integrity_register(dm_disk(t->md), NULL); + + return; +} + void dm_table_set_restrictions(struct dm_table *t, struct request_queue *q) { /* @@ -899,6 +925,7 @@ void dm_table_set_restrictions(struct dm_table *t, struct request_queue *q) else queue_flag_set_unlocked(QUEUE_FLAG_CLUSTER, q); + dm_table_set_integrity(t); } unsigned int dm_table_get_num_targets(struct dm_table *t) @@ -1019,12 +1046,6 @@ struct mapped_device *dm_table_get_md(struct dm_table *t) return t->md; } -int dm_table_barrier_ok(struct dm_table *t) -{ - return t->barriers_supported; -} -EXPORT_SYMBOL(dm_table_barrier_ok); - EXPORT_SYMBOL(dm_vcalloc); EXPORT_SYMBOL(dm_get_device); EXPORT_SYMBOL(dm_put_device); diff --git a/drivers/md/dm.c b/drivers/md/dm.c index 788ba96a625..424f7b048c3 100644 --- a/drivers/md/dm.c +++ b/drivers/md/dm.c @@ -6,7 +6,6 @@ */ #include "dm.h" -#include "dm-bio-list.h" #include "dm-uevent.h" #include <linux/init.h> @@ -89,12 +88,13 @@ union map_info *dm_get_mapinfo(struct bio *bio) /* * Bits for the md->flags field. */ -#define DMF_BLOCK_IO 0 +#define DMF_BLOCK_IO_FOR_SUSPEND 0 #define DMF_SUSPENDED 1 #define DMF_FROZEN 2 #define DMF_FREEING 3 #define DMF_DELETING 4 #define DMF_NOFLUSH_SUSPENDING 5 +#define DMF_QUEUE_IO_TO_THREAD 6 /* * Work processed by per-device workqueue. @@ -124,6 +124,11 @@ struct mapped_device { spinlock_t deferred_lock; /* + * An error from the barrier request currently being processed. + */ + int barrier_error; + + /* * Processing queue (flush/barriers) */ struct workqueue_struct *wq; @@ -424,6 +429,10 @@ static void end_io_acct(struct dm_io *io) part_stat_add(cpu, &dm_disk(md)->part0, ticks[rw], duration); part_stat_unlock(); + /* + * After this is decremented the bio must not be touched if it is + * a barrier. + */ dm_disk(md)->part0.in_flight = pending = atomic_dec_return(&md->pending); @@ -435,21 +444,18 @@ static void end_io_acct(struct dm_io *io) /* * Add the bio to the list of deferred io. */ -static int queue_io(struct mapped_device *md, struct bio *bio) +static void queue_io(struct mapped_device *md, struct bio *bio) { down_write(&md->io_lock); - if (!test_bit(DMF_BLOCK_IO, &md->flags)) { - up_write(&md->io_lock); - return 1; - } - spin_lock_irq(&md->deferred_lock); bio_list_add(&md->deferred, bio); spin_unlock_irq(&md->deferred_lock); + if (!test_and_set_bit(DMF_QUEUE_IO_TO_THREAD, &md->flags)) + queue_work(md->wq, &md->work); + up_write(&md->io_lock); - return 0; /* deferred successfully */ } /* @@ -533,25 +539,35 @@ static void dec_pending(struct dm_io *io, int error) */ spin_lock_irqsave(&md->deferred_lock, flags); if (__noflush_suspending(md)) - bio_list_add(&md->deferred, io->bio); + bio_list_add_head(&md->deferred, io->bio); else /* noflush suspend was interrupted. */ io->error = -EIO; spin_unlock_irqrestore(&md->deferred_lock, flags); } - end_io_acct(io); - io_error = io->error; bio = io->bio; - free_io(md, io); + if (bio_barrier(bio)) { + /* + * There can be just one barrier request so we use + * a per-device variable for error reporting. + * Note that you can't touch the bio after end_io_acct + */ + md->barrier_error = io_error; + end_io_acct(io); + } else { + end_io_acct(io); - if (io_error != DM_ENDIO_REQUEUE) { - trace_block_bio_complete(md->queue, bio); + if (io_error != DM_ENDIO_REQUEUE) { + trace_block_bio_complete(md->queue, bio); - bio_endio(bio, io_error); + bio_endio(bio, io_error); + } } + + free_io(md, io); } } @@ -693,13 +709,19 @@ static struct bio *split_bvec(struct bio *bio, sector_t sector, clone->bi_sector = sector; clone->bi_bdev = bio->bi_bdev; - clone->bi_rw = bio->bi_rw; + clone->bi_rw = bio->bi_rw & ~(1 << BIO_RW_BARRIER); clone->bi_vcnt = 1; clone->bi_size = to_bytes(len); clone->bi_io_vec->bv_offset = offset; clone->bi_io_vec->bv_len = clone->bi_size; clone->bi_flags |= 1 << BIO_CLONED; + if (bio_integrity(bio)) { + bio_integrity_clone(clone, bio, GFP_NOIO); + bio_integrity_trim(clone, + bio_sector_offset(bio, idx, offset), len); + } + return clone; } @@ -714,6 +736,7 @@ static struct bio *clone_bio(struct bio *bio, sector_t sector, clone = bio_alloc_bioset(GFP_NOIO, bio->bi_max_vecs, bs); __bio_clone(clone, bio); + clone->bi_rw &= ~(1 << BIO_RW_BARRIER); clone->bi_destructor = dm_bio_destructor; clone->bi_sector = sector; clone->bi_idx = idx; @@ -721,6 +744,14 @@ static struct bio *clone_bio(struct bio *bio, sector_t sector, clone->bi_size = to_bytes(len); clone->bi_flags &= ~(1 << BIO_SEG_VALID); + if (bio_integrity(bio)) { + bio_integrity_clone(clone, bio, GFP_NOIO); + + if (idx != bio->bi_idx || clone->bi_size < bio->bi_size) + bio_integrity_trim(clone, + bio_sector_offset(bio, idx, 0), len); + } + return clone; } @@ -834,14 +865,13 @@ static void __split_and_process_bio(struct mapped_device *md, struct bio *bio) ci.map = dm_get_table(md); if (unlikely(!ci.map)) { - bio_io_error(bio); - return; - } - if (unlikely(bio_barrier(bio) && !dm_table_barrier_ok(ci.map))) { - dm_table_put(ci.map); - bio_endio(bio, -EOPNOTSUPP); + if (!bio_barrier(bio)) + bio_io_error(bio); + else + md->barrier_error = -EIO; return; } + ci.md = md; ci.bio = bio; ci.io = alloc_io(md); @@ -918,7 +948,6 @@ out: */ static int dm_request(struct request_queue *q, struct bio *bio) { - int r = -EIO; int rw = bio_data_dir(bio); struct mapped_device *md = q->queuedata; int cpu; @@ -931,34 +960,27 @@ static int dm_request(struct request_queue *q, struct bio *bio) part_stat_unlock(); /* - * If we're suspended we have to queue - * this io for later. + * If we're suspended or the thread is processing barriers + * we have to queue this io for later. */ - while (test_bit(DMF_BLOCK_IO, &md->flags)) { + if (unlikely(test_bit(DMF_QUEUE_IO_TO_THREAD, &md->flags)) || + unlikely(bio_barrier(bio))) { up_read(&md->io_lock); - if (bio_rw(bio) != READA) - r = queue_io(md, bio); + if (unlikely(test_bit(DMF_BLOCK_IO_FOR_SUSPEND, &md->flags)) && + bio_rw(bio) == READA) { + bio_io_error(bio); + return 0; + } - if (r <= 0) - goto out_req; + queue_io(md, bio); - /* - * We're in a while loop, because someone could suspend - * before we get to the following read lock. - */ - down_read(&md->io_lock); + return 0; } __split_and_process_bio(md, bio); up_read(&md->io_lock); return 0; - -out_req: - if (r < 0) - bio_io_error(bio); - - return 0; } static void dm_unplug_all(struct request_queue *q) @@ -978,7 +1000,7 @@ static int dm_any_congested(void *congested_data, int bdi_bits) struct mapped_device *md = congested_data; struct dm_table *map; - if (!test_bit(DMF_BLOCK_IO, &md->flags)) { + if (!test_bit(DMF_BLOCK_IO_FOR_SUSPEND, &md->flags)) { map = dm_get_table(md); if (map) { r = dm_table_any_congested(map, bdi_bits); @@ -1193,6 +1215,7 @@ static void free_dev(struct mapped_device *md) mempool_destroy(md->tio_pool); mempool_destroy(md->io_pool); bioset_free(md->bs); + blk_integrity_unregister(md->disk); del_gendisk(md->disk); free_minor(minor); @@ -1406,6 +1429,36 @@ static int dm_wait_for_completion(struct mapped_device *md, int interruptible) return r; } +static int dm_flush(struct mapped_device *md) +{ + dm_wait_for_completion(md, TASK_UNINTERRUPTIBLE); + return 0; +} + +static void process_barrier(struct mapped_device *md, struct bio *bio) +{ + int error = dm_flush(md); + + if (unlikely(error)) { + bio_endio(bio, error); + return; + } + if (bio_empty_barrier(bio)) { + bio_endio(bio, 0); + return; + } + + __split_and_process_bio(md, bio); + + error = dm_flush(md); + + if (!error && md->barrier_error) + error = md->barrier_error; + + if (md->barrier_error != DM_ENDIO_REQUEUE) + bio_endio(bio, error); +} + /* * Process the deferred bios */ @@ -1417,25 +1470,34 @@ static void dm_wq_work(struct work_struct *work) down_write(&md->io_lock); -next_bio: - spin_lock_irq(&md->deferred_lock); - c = bio_list_pop(&md->deferred); - spin_unlock_irq(&md->deferred_lock); + while (!test_bit(DMF_BLOCK_IO_FOR_SUSPEND, &md->flags)) { + spin_lock_irq(&md->deferred_lock); + c = bio_list_pop(&md->deferred); + spin_unlock_irq(&md->deferred_lock); - if (c) { - __split_and_process_bio(md, c); - goto next_bio; - } + if (!c) { + clear_bit(DMF_QUEUE_IO_TO_THREAD, &md->flags); + break; + } - clear_bit(DMF_BLOCK_IO, &md->flags); + up_write(&md->io_lock); + + if (bio_barrier(c)) + process_barrier(md, c); + else + __split_and_process_bio(md, c); + + down_write(&md->io_lock); + } up_write(&md->io_lock); } static void dm_queue_flush(struct mapped_device *md) { + clear_bit(DMF_BLOCK_IO_FOR_SUSPEND, &md->flags); + smp_mb__after_clear_bit(); queue_work(md->wq, &md->work); - flush_workqueue(md->wq); } /* @@ -1553,20 +1615,36 @@ int dm_suspend(struct mapped_device *md, unsigned suspend_flags) } /* - * First we set the BLOCK_IO flag so no more ios will be mapped. + * Here we must make sure that no processes are submitting requests + * to target drivers i.e. no one may be executing + * __split_and_process_bio. This is called from dm_request and + * dm_wq_work. + * + * To get all processes out of __split_and_process_bio in dm_request, + * we take the write lock. To prevent any process from reentering + * __split_and_process_bio from dm_request, we set + * DMF_QUEUE_IO_TO_THREAD. + * + * To quiesce the thread (dm_wq_work), we set DMF_BLOCK_IO_FOR_SUSPEND + * and call flush_workqueue(md->wq). flush_workqueue will wait until + * dm_wq_work exits and DMF_BLOCK_IO_FOR_SUSPEND will prevent any + * further calls to __split_and_process_bio from dm_wq_work. */ down_write(&md->io_lock); - set_bit(DMF_BLOCK_IO, &md->flags); - + set_bit(DMF_BLOCK_IO_FOR_SUSPEND, &md->flags); + set_bit(DMF_QUEUE_IO_TO_THREAD, &md->flags); up_write(&md->io_lock); + flush_workqueue(md->wq); + /* - * Wait for the already-mapped ios to complete. + * At this point no more requests are entering target request routines. + * We call dm_wait_for_completion to wait for all existing requests + * to finish. */ r = dm_wait_for_completion(md, TASK_INTERRUPTIBLE); down_write(&md->io_lock); - if (noflush) clear_bit(DMF_NOFLUSH_SUSPENDING, &md->flags); up_write(&md->io_lock); @@ -1579,6 +1657,12 @@ int dm_suspend(struct mapped_device *md, unsigned suspend_flags) goto out; /* pushback list is already flushed, so skip flush */ } + /* + * If dm_wait_for_completion returned 0, the device is completely + * quiescent now. There is no request-processing activity. All new + * requests are being added to md->deferred list. + */ + dm_table_postsuspend_targets(map); set_bit(DMF_SUSPENDED, &md->flags); diff --git a/drivers/md/dm.h b/drivers/md/dm.h index b48397c0abb..a31506d93e9 100644 --- a/drivers/md/dm.h +++ b/drivers/md/dm.h @@ -52,7 +52,6 @@ int dm_table_any_congested(struct dm_table *t, int bdi_bits); * To check the return value from dm_table_find_target(). */ #define dm_target_is_valid(t) ((t)->table) -int dm_table_barrier_ok(struct dm_table *t); /*----------------------------------------------------------------- * A registry of target types. diff --git a/drivers/md/md.c b/drivers/md/md.c index ed5727c089a..612343fdde9 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c @@ -2017,6 +2017,8 @@ repeat: clear_bit(MD_CHANGE_PENDING, &mddev->flags); spin_unlock_irq(&mddev->write_lock); wake_up(&mddev->sb_wait); + if (test_bit(MD_RECOVERY_RUNNING, &mddev->recovery)) + sysfs_notify(&mddev->kobj, NULL, "sync_completed"); } @@ -2086,6 +2088,7 @@ state_store(mdk_rdev_t *rdev, const char *buf, size_t len) * -writemostly - clears write_mostly * blocked - sets the Blocked flag * -blocked - clears the Blocked flag + * insync - sets Insync providing device isn't active */ int err = -EINVAL; if (cmd_match(buf, "faulty") && rdev->mddev->pers) { @@ -2118,6 +2121,9 @@ state_store(mdk_rdev_t *rdev, const char *buf, size_t len) md_wakeup_thread(rdev->mddev->thread); err = 0; + } else if (cmd_match(buf, "insync") && rdev->raid_disk == -1) { + set_bit(In_sync, &rdev->flags); + err = 0; } if (!err && rdev->sysfs_state) sysfs_notify_dirent(rdev->sysfs_state); @@ -2190,7 +2196,7 @@ slot_store(mdk_rdev_t *rdev, const char *buf, size_t len) } else if (rdev->mddev->pers) { mdk_rdev_t *rdev2; /* Activating a spare .. or possibly reactivating - * if we every get bitmaps working here. + * if we ever get bitmaps working here. */ if (rdev->raid_disk != -1) @@ -3482,12 +3488,15 @@ sync_completed_show(mddev_t *mddev, char *page) { unsigned long max_sectors, resync; + if (!test_bit(MD_RECOVERY_RUNNING, &mddev->recovery)) + return sprintf(page, "none\n"); + if (test_bit(MD_RECOVERY_SYNC, &mddev->recovery)) max_sectors = mddev->resync_max_sectors; else max_sectors = mddev->dev_sectors; - resync = (mddev->curr_resync - atomic_read(&mddev->recovery_active)); + resync = mddev->curr_resync_completed; return sprintf(page, "%lu / %lu\n", resync, max_sectors); } @@ -6334,18 +6343,13 @@ void md_do_sync(mddev_t *mddev) sector_t sectors; skipped = 0; - if (j >= mddev->resync_max) { - sysfs_notify(&mddev->kobj, NULL, "sync_completed"); - wait_event(mddev->recovery_wait, - mddev->resync_max > j - || kthread_should_stop()); - } - if (kthread_should_stop()) - goto interrupted; - if (mddev->curr_resync > mddev->curr_resync_completed && - (mddev->curr_resync - mddev->curr_resync_completed) - > (max_sectors >> 4)) { + if ((mddev->curr_resync > mddev->curr_resync_completed && + (mddev->curr_resync - mddev->curr_resync_completed) + > (max_sectors >> 4)) || + (j - mddev->curr_resync_completed)*2 + >= mddev->resync_max - mddev->curr_resync_completed + ) { /* time to update curr_resync_completed */ blk_unplug(mddev->queue); wait_event(mddev->recovery_wait, @@ -6353,7 +6357,17 @@ void md_do_sync(mddev_t *mddev) mddev->curr_resync_completed = mddev->curr_resync; set_bit(MD_CHANGE_CLEAN, &mddev->flags); + sysfs_notify(&mddev->kobj, NULL, "sync_completed"); } + + if (j >= mddev->resync_max) + wait_event(mddev->recovery_wait, + mddev->resync_max > j + || kthread_should_stop()); + + if (kthread_should_stop()) + goto interrupted; + sectors = mddev->pers->sync_request(mddev, j, &skipped, currspeed < speed_min(mddev)); if (sectors == 0) { @@ -6461,6 +6475,7 @@ void md_do_sync(mddev_t *mddev) skip: mddev->curr_resync = 0; + mddev->curr_resync_completed = 0; mddev->resync_min = 0; mddev->resync_max = MaxSector; sysfs_notify(&mddev->kobj, NULL, "sync_completed"); diff --git a/drivers/md/md.h b/drivers/md/md.h index e9b7f54c24d..8227ab909d4 100644 --- a/drivers/md/md.h +++ b/drivers/md/md.h @@ -12,10 +12,17 @@ Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */ -#ifndef _MD_K_H -#define _MD_K_H - -#ifdef CONFIG_BLOCK +#ifndef _MD_MD_H +#define _MD_MD_H + +#include <linux/blkdev.h> +#include <linux/kobject.h> +#include <linux/list.h> +#include <linux/mm.h> +#include <linux/mutex.h> +#include <linux/timer.h> +#include <linux/wait.h> +#include <linux/workqueue.h> #define MaxSector (~(sector_t)0) @@ -408,10 +415,6 @@ static inline void safe_put_page(struct page *p) if (p) put_page(p); } -#endif /* CONFIG_BLOCK */ -#endif - - extern int register_md_personality(struct mdk_personality *p); extern int unregister_md_personality(struct mdk_personality *p); extern mdk_thread_t * md_register_thread(void (*run) (mddev_t *mddev), @@ -434,3 +437,5 @@ extern void md_new_event(mddev_t *mddev); extern int md_allow_write(mddev_t *mddev); extern void md_wait_for_blocked_rdev(mdk_rdev_t *rdev, mddev_t *mddev); extern void md_set_array_sectors(mddev_t *mddev, sector_t array_sectors); + +#endif /* _MD_MD_H */ diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c index 274b491a11c..36df9109cde 100644 --- a/drivers/md/raid1.c +++ b/drivers/md/raid1.c @@ -35,7 +35,6 @@ #include <linux/blkdev.h> #include <linux/seq_file.h> #include "md.h" -#include "dm-bio-list.h" #include "raid1.h" #include "bitmap.h" diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c index e293d92641a..81a54f17417 100644 --- a/drivers/md/raid10.c +++ b/drivers/md/raid10.c @@ -22,7 +22,6 @@ #include <linux/blkdev.h> #include <linux/seq_file.h> #include "md.h" -#include "dm-bio-list.h" #include "raid10.h" #include "bitmap.h" diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c index 3bbc6d64704..4616bc3a6e7 100644 --- a/drivers/md/raid5.c +++ b/drivers/md/raid5.c @@ -3845,6 +3845,7 @@ static sector_t reshape_request(mddev_t *mddev, sector_t sector_nr, int *skipped wait_event(conf->wait_for_overlap, atomic_read(&conf->reshape_stripes)==0); mddev->reshape_position = conf->reshape_progress; + mddev->curr_resync_completed = mddev->curr_resync; conf->reshape_checkpoint = jiffies; set_bit(MD_CHANGE_DEVS, &mddev->flags); md_wakeup_thread(mddev->thread); @@ -3854,6 +3855,7 @@ static sector_t reshape_request(mddev_t *mddev, sector_t sector_nr, int *skipped conf->reshape_safe = mddev->reshape_position; spin_unlock_irq(&conf->device_lock); wake_up(&conf->wait_for_overlap); + sysfs_notify(&mddev->kobj, NULL, "sync_completed"); } if (mddev->delta_disks < 0) { @@ -3938,11 +3940,13 @@ static sector_t reshape_request(mddev_t *mddev, sector_t sector_nr, int *skipped * then we need to write out the superblock. */ sector_nr += reshape_sectors; - if (sector_nr >= mddev->resync_max) { + if ((sector_nr - mddev->curr_resync_completed) * 2 + >= mddev->resync_max - mddev->curr_resync_completed) { /* Cannot proceed until we've updated the superblock... */ wait_event(conf->wait_for_overlap, atomic_read(&conf->reshape_stripes) == 0); mddev->reshape_position = conf->reshape_progress; + mddev->curr_resync_completed = mddev->curr_resync; conf->reshape_checkpoint = jiffies; set_bit(MD_CHANGE_DEVS, &mddev->flags); md_wakeup_thread(mddev->thread); @@ -3953,6 +3957,7 @@ static sector_t reshape_request(mddev_t *mddev, sector_t sector_nr, int *skipped conf->reshape_safe = mddev->reshape_position; spin_unlock_irq(&conf->device_lock); wake_up(&conf->wait_for_overlap); + sysfs_notify(&mddev->kobj, NULL, "sync_completed"); } return reshape_sectors; } |