diff options
Diffstat (limited to 'drivers/md/dm.c')
-rw-r--r-- | drivers/md/dm.c | 297 |
1 files changed, 175 insertions, 122 deletions
diff --git a/drivers/md/dm.c b/drivers/md/dm.c index 8d40f27cce8..8a994be035b 100644 --- a/drivers/md/dm.c +++ b/drivers/md/dm.c @@ -89,29 +89,20 @@ union map_info *dm_get_mapinfo(struct bio *bio) /* * Bits for the md->flags field. */ -#define DMF_BLOCK_IO 0 +#define DMF_BLOCK_IO_FOR_SUSPEND 0 #define DMF_SUSPENDED 1 #define DMF_FROZEN 2 #define DMF_FREEING 3 #define DMF_DELETING 4 #define DMF_NOFLUSH_SUSPENDING 5 +#define DMF_QUEUE_IO_TO_THREAD 6 /* * Work processed by per-device workqueue. */ -struct dm_wq_req { - enum { - DM_WQ_FLUSH_DEFERRED, - } type; - struct work_struct work; - struct mapped_device *md; - void *context; -}; - struct mapped_device { struct rw_semaphore io_lock; struct mutex suspend_lock; - spinlock_t pushback_lock; rwlock_t map_lock; atomic_t holders; atomic_t open_count; @@ -129,8 +120,14 @@ struct mapped_device { */ atomic_t pending; wait_queue_head_t wait; + struct work_struct work; struct bio_list deferred; - struct bio_list pushback; + spinlock_t deferred_lock; + + /* + * An error from the barrier request currently being processed. + */ + int barrier_error; /* * Processing queue (flush/barriers) @@ -433,6 +430,10 @@ static void end_io_acct(struct dm_io *io) part_stat_add(cpu, &dm_disk(md)->part0, ticks[rw], duration); part_stat_unlock(); + /* + * After this is decremented the bio must not be touched if it is + * a barrier. + */ dm_disk(md)->part0.in_flight = pending = atomic_dec_return(&md->pending); @@ -444,19 +445,18 @@ static void end_io_acct(struct dm_io *io) /* * Add the bio to the list of deferred io. */ -static int queue_io(struct mapped_device *md, struct bio *bio) +static void queue_io(struct mapped_device *md, struct bio *bio) { down_write(&md->io_lock); - if (!test_bit(DMF_BLOCK_IO, &md->flags)) { - up_write(&md->io_lock); - return 1; - } - + spin_lock_irq(&md->deferred_lock); bio_list_add(&md->deferred, bio); + spin_unlock_irq(&md->deferred_lock); + + if (!test_and_set_bit(DMF_QUEUE_IO_TO_THREAD, &md->flags)) + queue_work(md->wq, &md->work); up_write(&md->io_lock); - return 0; /* deferred successfully */ } /* @@ -537,30 +537,38 @@ static void dec_pending(struct dm_io *io, int error) if (io->error == DM_ENDIO_REQUEUE) { /* * Target requested pushing back the I/O. - * This must be handled before the sleeper on - * suspend queue merges the pushback list. */ - spin_lock_irqsave(&md->pushback_lock, flags); + spin_lock_irqsave(&md->deferred_lock, flags); if (__noflush_suspending(md)) - bio_list_add(&md->pushback, io->bio); + bio_list_add_head(&md->deferred, io->bio); else /* noflush suspend was interrupted. */ io->error = -EIO; - spin_unlock_irqrestore(&md->pushback_lock, flags); + spin_unlock_irqrestore(&md->deferred_lock, flags); } - end_io_acct(io); - io_error = io->error; bio = io->bio; - free_io(md, io); + if (bio_barrier(bio)) { + /* + * There can be just one barrier request so we use + * a per-device variable for error reporting. + * Note that you can't touch the bio after end_io_acct + */ + md->barrier_error = io_error; + end_io_acct(io); + } else { + end_io_acct(io); - if (io_error != DM_ENDIO_REQUEUE) { - trace_block_bio_complete(md->queue, bio); + if (io_error != DM_ENDIO_REQUEUE) { + trace_block_bio_complete(md->queue, bio); - bio_endio(bio, io_error); + bio_endio(bio, io_error); + } } + + free_io(md, io); } } @@ -702,13 +710,19 @@ static struct bio *split_bvec(struct bio *bio, sector_t sector, clone->bi_sector = sector; clone->bi_bdev = bio->bi_bdev; - clone->bi_rw = bio->bi_rw; + clone->bi_rw = bio->bi_rw & ~(1 << BIO_RW_BARRIER); clone->bi_vcnt = 1; clone->bi_size = to_bytes(len); clone->bi_io_vec->bv_offset = offset; clone->bi_io_vec->bv_len = clone->bi_size; clone->bi_flags |= 1 << BIO_CLONED; + if (bio_integrity(bio)) { + bio_integrity_clone(clone, bio, GFP_NOIO); + bio_integrity_trim(clone, + bio_sector_offset(bio, idx, offset), len); + } + return clone; } @@ -723,6 +737,7 @@ static struct bio *clone_bio(struct bio *bio, sector_t sector, clone = bio_alloc_bioset(GFP_NOIO, bio->bi_max_vecs, bs); __bio_clone(clone, bio); + clone->bi_rw &= ~(1 << BIO_RW_BARRIER); clone->bi_destructor = dm_bio_destructor; clone->bi_sector = sector; clone->bi_idx = idx; @@ -730,6 +745,14 @@ static struct bio *clone_bio(struct bio *bio, sector_t sector, clone->bi_size = to_bytes(len); clone->bi_flags &= ~(1 << BIO_SEG_VALID); + if (bio_integrity(bio)) { + bio_integrity_clone(clone, bio, GFP_NOIO); + + if (idx != bio->bi_idx || clone->bi_size < bio->bi_size) + bio_integrity_trim(clone, + bio_sector_offset(bio, idx, 0), len); + } + return clone; } @@ -834,21 +857,22 @@ static int __clone_and_map(struct clone_info *ci) } /* - * Split the bio into several clones. + * Split the bio into several clones and submit it to targets. */ -static int __split_bio(struct mapped_device *md, struct bio *bio) +static void __split_and_process_bio(struct mapped_device *md, struct bio *bio) { struct clone_info ci; int error = 0; ci.map = dm_get_table(md); - if (unlikely(!ci.map)) - return -EIO; - if (unlikely(bio_barrier(bio) && !dm_table_barrier_ok(ci.map))) { - dm_table_put(ci.map); - bio_endio(bio, -EOPNOTSUPP); - return 0; + if (unlikely(!ci.map)) { + if (!bio_barrier(bio)) + bio_io_error(bio); + else + md->barrier_error = -EIO; + return; } + ci.md = md; ci.bio = bio; ci.io = alloc_io(md); @@ -867,8 +891,6 @@ static int __split_bio(struct mapped_device *md, struct bio *bio) /* drop the extra reference count */ dec_pending(ci.io, error); dm_table_put(ci.map); - - return 0; } /*----------------------------------------------------------------- * CRUD END @@ -927,7 +949,6 @@ out: */ static int dm_request(struct request_queue *q, struct bio *bio) { - int r = -EIO; int rw = bio_data_dir(bio); struct mapped_device *md = q->queuedata; int cpu; @@ -940,32 +961,26 @@ static int dm_request(struct request_queue *q, struct bio *bio) part_stat_unlock(); /* - * If we're suspended we have to queue - * this io for later. + * If we're suspended or the thread is processing barriers + * we have to queue this io for later. */ - while (test_bit(DMF_BLOCK_IO, &md->flags)) { + if (unlikely(test_bit(DMF_QUEUE_IO_TO_THREAD, &md->flags)) || + unlikely(bio_barrier(bio))) { up_read(&md->io_lock); - if (bio_rw(bio) != READA) - r = queue_io(md, bio); + if (unlikely(test_bit(DMF_BLOCK_IO_FOR_SUSPEND, &md->flags)) && + bio_rw(bio) == READA) { + bio_io_error(bio); + return 0; + } - if (r <= 0) - goto out_req; + queue_io(md, bio); - /* - * We're in a while loop, because someone could suspend - * before we get to the following read lock. - */ - down_read(&md->io_lock); + return 0; } - r = __split_bio(md, bio); + __split_and_process_bio(md, bio); up_read(&md->io_lock); - -out_req: - if (r < 0) - bio_io_error(bio); - return 0; } @@ -986,7 +1001,7 @@ static int dm_any_congested(void *congested_data, int bdi_bits) struct mapped_device *md = congested_data; struct dm_table *map; - if (!test_bit(DMF_BLOCK_IO, &md->flags)) { + if (!test_bit(DMF_BLOCK_IO_FOR_SUSPEND, &md->flags)) { map = dm_get_table(md); if (map) { r = dm_table_any_congested(map, bdi_bits); @@ -1074,6 +1089,8 @@ out: static struct block_device_operations dm_blk_dops; +static void dm_wq_work(struct work_struct *work); + /* * Allocate and initialise a blank device with a given minor. */ @@ -1101,7 +1118,7 @@ static struct mapped_device *alloc_dev(int minor) init_rwsem(&md->io_lock); mutex_init(&md->suspend_lock); - spin_lock_init(&md->pushback_lock); + spin_lock_init(&md->deferred_lock); rwlock_init(&md->map_lock); atomic_set(&md->holders, 1); atomic_set(&md->open_count, 0); @@ -1118,6 +1135,7 @@ static struct mapped_device *alloc_dev(int minor) md->queue->backing_dev_info.congested_fn = dm_any_congested; md->queue->backing_dev_info.congested_data = md; blk_queue_make_request(md->queue, dm_request); + blk_queue_ordered(md->queue, QUEUE_ORDERED_DRAIN, NULL); blk_queue_bounce_limit(md->queue, BLK_BOUNCE_ANY); md->queue->unplug_fn = dm_unplug_all; blk_queue_merge_bvec(md->queue, dm_merge_bvec); @@ -1140,6 +1158,7 @@ static struct mapped_device *alloc_dev(int minor) atomic_set(&md->pending, 0); init_waitqueue_head(&md->wait); + INIT_WORK(&md->work, dm_wq_work); init_waitqueue_head(&md->eventq); md->disk->major = _major; @@ -1197,6 +1216,7 @@ static void free_dev(struct mapped_device *md) mempool_destroy(md->tio_pool); mempool_destroy(md->io_pool); bioset_free(md->bs); + blk_integrity_unregister(md->disk); del_gendisk(md->disk); free_minor(minor); @@ -1379,18 +1399,24 @@ void dm_put(struct mapped_device *md) } EXPORT_SYMBOL_GPL(dm_put); -static int dm_wait_for_completion(struct mapped_device *md) +static int dm_wait_for_completion(struct mapped_device *md, int interruptible) { int r = 0; + DECLARE_WAITQUEUE(wait, current); + + dm_unplug_all(md->queue); + + add_wait_queue(&md->wait, &wait); while (1) { - set_current_state(TASK_INTERRUPTIBLE); + set_current_state(interruptible); smp_mb(); if (!atomic_read(&md->pending)) break; - if (signal_pending(current)) { + if (interruptible == TASK_INTERRUPTIBLE && + signal_pending(current)) { r = -EINTR; break; } @@ -1399,68 +1425,80 @@ static int dm_wait_for_completion(struct mapped_device *md) } set_current_state(TASK_RUNNING); + remove_wait_queue(&md->wait, &wait); + return r; } -/* - * Process the deferred bios - */ -static void __flush_deferred_io(struct mapped_device *md) +static int dm_flush(struct mapped_device *md) { - struct bio *c; + dm_wait_for_completion(md, TASK_UNINTERRUPTIBLE); + return 0; +} + +static void process_barrier(struct mapped_device *md, struct bio *bio) +{ + int error = dm_flush(md); - while ((c = bio_list_pop(&md->deferred))) { - if (__split_bio(md, c)) - bio_io_error(c); + if (unlikely(error)) { + bio_endio(bio, error); + return; + } + if (bio_empty_barrier(bio)) { + bio_endio(bio, 0); + return; } - clear_bit(DMF_BLOCK_IO, &md->flags); -} + __split_and_process_bio(md, bio); -static void __merge_pushback_list(struct mapped_device *md) -{ - unsigned long flags; + error = dm_flush(md); - spin_lock_irqsave(&md->pushback_lock, flags); - clear_bit(DMF_NOFLUSH_SUSPENDING, &md->flags); - bio_list_merge_head(&md->deferred, &md->pushback); - bio_list_init(&md->pushback); - spin_unlock_irqrestore(&md->pushback_lock, flags); + if (!error && md->barrier_error) + error = md->barrier_error; + + if (md->barrier_error != DM_ENDIO_REQUEUE) + bio_endio(bio, error); } +/* + * Process the deferred bios + */ static void dm_wq_work(struct work_struct *work) { - struct dm_wq_req *req = container_of(work, struct dm_wq_req, work); - struct mapped_device *md = req->md; + struct mapped_device *md = container_of(work, struct mapped_device, + work); + struct bio *c; down_write(&md->io_lock); - switch (req->type) { - case DM_WQ_FLUSH_DEFERRED: - __flush_deferred_io(md); - break; - default: - DMERR("dm_wq_work: unrecognised work type %d", req->type); - BUG(); + + while (!test_bit(DMF_BLOCK_IO_FOR_SUSPEND, &md->flags)) { + spin_lock_irq(&md->deferred_lock); + c = bio_list_pop(&md->deferred); + spin_unlock_irq(&md->deferred_lock); + + if (!c) { + clear_bit(DMF_QUEUE_IO_TO_THREAD, &md->flags); + break; + } + + up_write(&md->io_lock); + + if (bio_barrier(c)) + process_barrier(md, c); + else + __split_and_process_bio(md, c); + + down_write(&md->io_lock); } - up_write(&md->io_lock); -} -static void dm_wq_queue(struct mapped_device *md, int type, void *context, - struct dm_wq_req *req) -{ - req->type = type; - req->md = md; - req->context = context; - INIT_WORK(&req->work, dm_wq_work); - queue_work(md->wq, &req->work); + up_write(&md->io_lock); } -static void dm_queue_flush(struct mapped_device *md, int type, void *context) +static void dm_queue_flush(struct mapped_device *md) { - struct dm_wq_req req; - - dm_wq_queue(md, type, context, &req); - flush_workqueue(md->wq); + clear_bit(DMF_BLOCK_IO_FOR_SUSPEND, &md->flags); + smp_mb__after_clear_bit(); + queue_work(md->wq, &md->work); } /* @@ -1534,7 +1572,6 @@ static void unlock_fs(struct mapped_device *md) int dm_suspend(struct mapped_device *md, unsigned suspend_flags) { struct dm_table *map = NULL; - DECLARE_WAITQUEUE(wait, current); int r = 0; int do_lockfs = suspend_flags & DM_SUSPEND_LOCKFS_FLAG ? 1 : 0; int noflush = suspend_flags & DM_SUSPEND_NOFLUSH_FLAG ? 1 : 0; @@ -1579,38 +1616,54 @@ int dm_suspend(struct mapped_device *md, unsigned suspend_flags) } /* - * First we set the BLOCK_IO flag so no more ios will be mapped. + * Here we must make sure that no processes are submitting requests + * to target drivers i.e. no one may be executing + * __split_and_process_bio. This is called from dm_request and + * dm_wq_work. + * + * To get all processes out of __split_and_process_bio in dm_request, + * we take the write lock. To prevent any process from reentering + * __split_and_process_bio from dm_request, we set + * DMF_QUEUE_IO_TO_THREAD. + * + * To quiesce the thread (dm_wq_work), we set DMF_BLOCK_IO_FOR_SUSPEND + * and call flush_workqueue(md->wq). flush_workqueue will wait until + * dm_wq_work exits and DMF_BLOCK_IO_FOR_SUSPEND will prevent any + * further calls to __split_and_process_bio from dm_wq_work. */ down_write(&md->io_lock); - set_bit(DMF_BLOCK_IO, &md->flags); - - add_wait_queue(&md->wait, &wait); + set_bit(DMF_BLOCK_IO_FOR_SUSPEND, &md->flags); + set_bit(DMF_QUEUE_IO_TO_THREAD, &md->flags); up_write(&md->io_lock); - /* unplug */ - if (map) - dm_table_unplug_all(map); + flush_workqueue(md->wq); /* - * Wait for the already-mapped ios to complete. + * At this point no more requests are entering target request routines. + * We call dm_wait_for_completion to wait for all existing requests + * to finish. */ - r = dm_wait_for_completion(md); + r = dm_wait_for_completion(md, TASK_INTERRUPTIBLE); down_write(&md->io_lock); - remove_wait_queue(&md->wait, &wait); - if (noflush) - __merge_pushback_list(md); + clear_bit(DMF_NOFLUSH_SUSPENDING, &md->flags); up_write(&md->io_lock); /* were we interrupted ? */ if (r < 0) { - dm_queue_flush(md, DM_WQ_FLUSH_DEFERRED, NULL); + dm_queue_flush(md); unlock_fs(md); goto out; /* pushback list is already flushed, so skip flush */ } + /* + * If dm_wait_for_completion returned 0, the device is completely + * quiescent now. There is no request-processing activity. All new + * requests are being added to md->deferred list. + */ + dm_table_postsuspend_targets(map); set_bit(DMF_SUSPENDED, &md->flags); @@ -1645,7 +1698,7 @@ int dm_resume(struct mapped_device *md) if (r) goto out; - dm_queue_flush(md, DM_WQ_FLUSH_DEFERRED, NULL); + dm_queue_flush(md); unlock_fs(md); |