aboutsummaryrefslogtreecommitdiff
path: root/drivers/md/dm.c
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/md/dm.c')
-rw-r--r--drivers/md/dm.c297
1 files changed, 175 insertions, 122 deletions
diff --git a/drivers/md/dm.c b/drivers/md/dm.c
index 8d40f27cce8..8a994be035b 100644
--- a/drivers/md/dm.c
+++ b/drivers/md/dm.c
@@ -89,29 +89,20 @@ union map_info *dm_get_mapinfo(struct bio *bio)
/*
* Bits for the md->flags field.
*/
-#define DMF_BLOCK_IO 0
+#define DMF_BLOCK_IO_FOR_SUSPEND 0
#define DMF_SUSPENDED 1
#define DMF_FROZEN 2
#define DMF_FREEING 3
#define DMF_DELETING 4
#define DMF_NOFLUSH_SUSPENDING 5
+#define DMF_QUEUE_IO_TO_THREAD 6
/*
* Work processed by per-device workqueue.
*/
-struct dm_wq_req {
- enum {
- DM_WQ_FLUSH_DEFERRED,
- } type;
- struct work_struct work;
- struct mapped_device *md;
- void *context;
-};
-
struct mapped_device {
struct rw_semaphore io_lock;
struct mutex suspend_lock;
- spinlock_t pushback_lock;
rwlock_t map_lock;
atomic_t holders;
atomic_t open_count;
@@ -129,8 +120,14 @@ struct mapped_device {
*/
atomic_t pending;
wait_queue_head_t wait;
+ struct work_struct work;
struct bio_list deferred;
- struct bio_list pushback;
+ spinlock_t deferred_lock;
+
+ /*
+ * An error from the barrier request currently being processed.
+ */
+ int barrier_error;
/*
* Processing queue (flush/barriers)
@@ -433,6 +430,10 @@ static void end_io_acct(struct dm_io *io)
part_stat_add(cpu, &dm_disk(md)->part0, ticks[rw], duration);
part_stat_unlock();
+ /*
+ * After this is decremented the bio must not be touched if it is
+ * a barrier.
+ */
dm_disk(md)->part0.in_flight = pending =
atomic_dec_return(&md->pending);
@@ -444,19 +445,18 @@ static void end_io_acct(struct dm_io *io)
/*
* Add the bio to the list of deferred io.
*/
-static int queue_io(struct mapped_device *md, struct bio *bio)
+static void queue_io(struct mapped_device *md, struct bio *bio)
{
down_write(&md->io_lock);
- if (!test_bit(DMF_BLOCK_IO, &md->flags)) {
- up_write(&md->io_lock);
- return 1;
- }
-
+ spin_lock_irq(&md->deferred_lock);
bio_list_add(&md->deferred, bio);
+ spin_unlock_irq(&md->deferred_lock);
+
+ if (!test_and_set_bit(DMF_QUEUE_IO_TO_THREAD, &md->flags))
+ queue_work(md->wq, &md->work);
up_write(&md->io_lock);
- return 0; /* deferred successfully */
}
/*
@@ -537,30 +537,38 @@ static void dec_pending(struct dm_io *io, int error)
if (io->error == DM_ENDIO_REQUEUE) {
/*
* Target requested pushing back the I/O.
- * This must be handled before the sleeper on
- * suspend queue merges the pushback list.
*/
- spin_lock_irqsave(&md->pushback_lock, flags);
+ spin_lock_irqsave(&md->deferred_lock, flags);
if (__noflush_suspending(md))
- bio_list_add(&md->pushback, io->bio);
+ bio_list_add_head(&md->deferred, io->bio);
else
/* noflush suspend was interrupted. */
io->error = -EIO;
- spin_unlock_irqrestore(&md->pushback_lock, flags);
+ spin_unlock_irqrestore(&md->deferred_lock, flags);
}
- end_io_acct(io);
-
io_error = io->error;
bio = io->bio;
- free_io(md, io);
+ if (bio_barrier(bio)) {
+ /*
+ * There can be just one barrier request so we use
+ * a per-device variable for error reporting.
+ * Note that you can't touch the bio after end_io_acct
+ */
+ md->barrier_error = io_error;
+ end_io_acct(io);
+ } else {
+ end_io_acct(io);
- if (io_error != DM_ENDIO_REQUEUE) {
- trace_block_bio_complete(md->queue, bio);
+ if (io_error != DM_ENDIO_REQUEUE) {
+ trace_block_bio_complete(md->queue, bio);
- bio_endio(bio, io_error);
+ bio_endio(bio, io_error);
+ }
}
+
+ free_io(md, io);
}
}
@@ -702,13 +710,19 @@ static struct bio *split_bvec(struct bio *bio, sector_t sector,
clone->bi_sector = sector;
clone->bi_bdev = bio->bi_bdev;
- clone->bi_rw = bio->bi_rw;
+ clone->bi_rw = bio->bi_rw & ~(1 << BIO_RW_BARRIER);
clone->bi_vcnt = 1;
clone->bi_size = to_bytes(len);
clone->bi_io_vec->bv_offset = offset;
clone->bi_io_vec->bv_len = clone->bi_size;
clone->bi_flags |= 1 << BIO_CLONED;
+ if (bio_integrity(bio)) {
+ bio_integrity_clone(clone, bio, GFP_NOIO);
+ bio_integrity_trim(clone,
+ bio_sector_offset(bio, idx, offset), len);
+ }
+
return clone;
}
@@ -723,6 +737,7 @@ static struct bio *clone_bio(struct bio *bio, sector_t sector,
clone = bio_alloc_bioset(GFP_NOIO, bio->bi_max_vecs, bs);
__bio_clone(clone, bio);
+ clone->bi_rw &= ~(1 << BIO_RW_BARRIER);
clone->bi_destructor = dm_bio_destructor;
clone->bi_sector = sector;
clone->bi_idx = idx;
@@ -730,6 +745,14 @@ static struct bio *clone_bio(struct bio *bio, sector_t sector,
clone->bi_size = to_bytes(len);
clone->bi_flags &= ~(1 << BIO_SEG_VALID);
+ if (bio_integrity(bio)) {
+ bio_integrity_clone(clone, bio, GFP_NOIO);
+
+ if (idx != bio->bi_idx || clone->bi_size < bio->bi_size)
+ bio_integrity_trim(clone,
+ bio_sector_offset(bio, idx, 0), len);
+ }
+
return clone;
}
@@ -834,21 +857,22 @@ static int __clone_and_map(struct clone_info *ci)
}
/*
- * Split the bio into several clones.
+ * Split the bio into several clones and submit it to targets.
*/
-static int __split_bio(struct mapped_device *md, struct bio *bio)
+static void __split_and_process_bio(struct mapped_device *md, struct bio *bio)
{
struct clone_info ci;
int error = 0;
ci.map = dm_get_table(md);
- if (unlikely(!ci.map))
- return -EIO;
- if (unlikely(bio_barrier(bio) && !dm_table_barrier_ok(ci.map))) {
- dm_table_put(ci.map);
- bio_endio(bio, -EOPNOTSUPP);
- return 0;
+ if (unlikely(!ci.map)) {
+ if (!bio_barrier(bio))
+ bio_io_error(bio);
+ else
+ md->barrier_error = -EIO;
+ return;
}
+
ci.md = md;
ci.bio = bio;
ci.io = alloc_io(md);
@@ -867,8 +891,6 @@ static int __split_bio(struct mapped_device *md, struct bio *bio)
/* drop the extra reference count */
dec_pending(ci.io, error);
dm_table_put(ci.map);
-
- return 0;
}
/*-----------------------------------------------------------------
* CRUD END
@@ -927,7 +949,6 @@ out:
*/
static int dm_request(struct request_queue *q, struct bio *bio)
{
- int r = -EIO;
int rw = bio_data_dir(bio);
struct mapped_device *md = q->queuedata;
int cpu;
@@ -940,32 +961,26 @@ static int dm_request(struct request_queue *q, struct bio *bio)
part_stat_unlock();
/*
- * If we're suspended we have to queue
- * this io for later.
+ * If we're suspended or the thread is processing barriers
+ * we have to queue this io for later.
*/
- while (test_bit(DMF_BLOCK_IO, &md->flags)) {
+ if (unlikely(test_bit(DMF_QUEUE_IO_TO_THREAD, &md->flags)) ||
+ unlikely(bio_barrier(bio))) {
up_read(&md->io_lock);
- if (bio_rw(bio) != READA)
- r = queue_io(md, bio);
+ if (unlikely(test_bit(DMF_BLOCK_IO_FOR_SUSPEND, &md->flags)) &&
+ bio_rw(bio) == READA) {
+ bio_io_error(bio);
+ return 0;
+ }
- if (r <= 0)
- goto out_req;
+ queue_io(md, bio);
- /*
- * We're in a while loop, because someone could suspend
- * before we get to the following read lock.
- */
- down_read(&md->io_lock);
+ return 0;
}
- r = __split_bio(md, bio);
+ __split_and_process_bio(md, bio);
up_read(&md->io_lock);
-
-out_req:
- if (r < 0)
- bio_io_error(bio);
-
return 0;
}
@@ -986,7 +1001,7 @@ static int dm_any_congested(void *congested_data, int bdi_bits)
struct mapped_device *md = congested_data;
struct dm_table *map;
- if (!test_bit(DMF_BLOCK_IO, &md->flags)) {
+ if (!test_bit(DMF_BLOCK_IO_FOR_SUSPEND, &md->flags)) {
map = dm_get_table(md);
if (map) {
r = dm_table_any_congested(map, bdi_bits);
@@ -1074,6 +1089,8 @@ out:
static struct block_device_operations dm_blk_dops;
+static void dm_wq_work(struct work_struct *work);
+
/*
* Allocate and initialise a blank device with a given minor.
*/
@@ -1101,7 +1118,7 @@ static struct mapped_device *alloc_dev(int minor)
init_rwsem(&md->io_lock);
mutex_init(&md->suspend_lock);
- spin_lock_init(&md->pushback_lock);
+ spin_lock_init(&md->deferred_lock);
rwlock_init(&md->map_lock);
atomic_set(&md->holders, 1);
atomic_set(&md->open_count, 0);
@@ -1118,6 +1135,7 @@ static struct mapped_device *alloc_dev(int minor)
md->queue->backing_dev_info.congested_fn = dm_any_congested;
md->queue->backing_dev_info.congested_data = md;
blk_queue_make_request(md->queue, dm_request);
+ blk_queue_ordered(md->queue, QUEUE_ORDERED_DRAIN, NULL);
blk_queue_bounce_limit(md->queue, BLK_BOUNCE_ANY);
md->queue->unplug_fn = dm_unplug_all;
blk_queue_merge_bvec(md->queue, dm_merge_bvec);
@@ -1140,6 +1158,7 @@ static struct mapped_device *alloc_dev(int minor)
atomic_set(&md->pending, 0);
init_waitqueue_head(&md->wait);
+ INIT_WORK(&md->work, dm_wq_work);
init_waitqueue_head(&md->eventq);
md->disk->major = _major;
@@ -1197,6 +1216,7 @@ static void free_dev(struct mapped_device *md)
mempool_destroy(md->tio_pool);
mempool_destroy(md->io_pool);
bioset_free(md->bs);
+ blk_integrity_unregister(md->disk);
del_gendisk(md->disk);
free_minor(minor);
@@ -1379,18 +1399,24 @@ void dm_put(struct mapped_device *md)
}
EXPORT_SYMBOL_GPL(dm_put);
-static int dm_wait_for_completion(struct mapped_device *md)
+static int dm_wait_for_completion(struct mapped_device *md, int interruptible)
{
int r = 0;
+ DECLARE_WAITQUEUE(wait, current);
+
+ dm_unplug_all(md->queue);
+
+ add_wait_queue(&md->wait, &wait);
while (1) {
- set_current_state(TASK_INTERRUPTIBLE);
+ set_current_state(interruptible);
smp_mb();
if (!atomic_read(&md->pending))
break;
- if (signal_pending(current)) {
+ if (interruptible == TASK_INTERRUPTIBLE &&
+ signal_pending(current)) {
r = -EINTR;
break;
}
@@ -1399,68 +1425,80 @@ static int dm_wait_for_completion(struct mapped_device *md)
}
set_current_state(TASK_RUNNING);
+ remove_wait_queue(&md->wait, &wait);
+
return r;
}
-/*
- * Process the deferred bios
- */
-static void __flush_deferred_io(struct mapped_device *md)
+static int dm_flush(struct mapped_device *md)
{
- struct bio *c;
+ dm_wait_for_completion(md, TASK_UNINTERRUPTIBLE);
+ return 0;
+}
+
+static void process_barrier(struct mapped_device *md, struct bio *bio)
+{
+ int error = dm_flush(md);
- while ((c = bio_list_pop(&md->deferred))) {
- if (__split_bio(md, c))
- bio_io_error(c);
+ if (unlikely(error)) {
+ bio_endio(bio, error);
+ return;
+ }
+ if (bio_empty_barrier(bio)) {
+ bio_endio(bio, 0);
+ return;
}
- clear_bit(DMF_BLOCK_IO, &md->flags);
-}
+ __split_and_process_bio(md, bio);
-static void __merge_pushback_list(struct mapped_device *md)
-{
- unsigned long flags;
+ error = dm_flush(md);
- spin_lock_irqsave(&md->pushback_lock, flags);
- clear_bit(DMF_NOFLUSH_SUSPENDING, &md->flags);
- bio_list_merge_head(&md->deferred, &md->pushback);
- bio_list_init(&md->pushback);
- spin_unlock_irqrestore(&md->pushback_lock, flags);
+ if (!error && md->barrier_error)
+ error = md->barrier_error;
+
+ if (md->barrier_error != DM_ENDIO_REQUEUE)
+ bio_endio(bio, error);
}
+/*
+ * Process the deferred bios
+ */
static void dm_wq_work(struct work_struct *work)
{
- struct dm_wq_req *req = container_of(work, struct dm_wq_req, work);
- struct mapped_device *md = req->md;
+ struct mapped_device *md = container_of(work, struct mapped_device,
+ work);
+ struct bio *c;
down_write(&md->io_lock);
- switch (req->type) {
- case DM_WQ_FLUSH_DEFERRED:
- __flush_deferred_io(md);
- break;
- default:
- DMERR("dm_wq_work: unrecognised work type %d", req->type);
- BUG();
+
+ while (!test_bit(DMF_BLOCK_IO_FOR_SUSPEND, &md->flags)) {
+ spin_lock_irq(&md->deferred_lock);
+ c = bio_list_pop(&md->deferred);
+ spin_unlock_irq(&md->deferred_lock);
+
+ if (!c) {
+ clear_bit(DMF_QUEUE_IO_TO_THREAD, &md->flags);
+ break;
+ }
+
+ up_write(&md->io_lock);
+
+ if (bio_barrier(c))
+ process_barrier(md, c);
+ else
+ __split_and_process_bio(md, c);
+
+ down_write(&md->io_lock);
}
- up_write(&md->io_lock);
-}
-static void dm_wq_queue(struct mapped_device *md, int type, void *context,
- struct dm_wq_req *req)
-{
- req->type = type;
- req->md = md;
- req->context = context;
- INIT_WORK(&req->work, dm_wq_work);
- queue_work(md->wq, &req->work);
+ up_write(&md->io_lock);
}
-static void dm_queue_flush(struct mapped_device *md, int type, void *context)
+static void dm_queue_flush(struct mapped_device *md)
{
- struct dm_wq_req req;
-
- dm_wq_queue(md, type, context, &req);
- flush_workqueue(md->wq);
+ clear_bit(DMF_BLOCK_IO_FOR_SUSPEND, &md->flags);
+ smp_mb__after_clear_bit();
+ queue_work(md->wq, &md->work);
}
/*
@@ -1534,7 +1572,6 @@ static void unlock_fs(struct mapped_device *md)
int dm_suspend(struct mapped_device *md, unsigned suspend_flags)
{
struct dm_table *map = NULL;
- DECLARE_WAITQUEUE(wait, current);
int r = 0;
int do_lockfs = suspend_flags & DM_SUSPEND_LOCKFS_FLAG ? 1 : 0;
int noflush = suspend_flags & DM_SUSPEND_NOFLUSH_FLAG ? 1 : 0;
@@ -1579,38 +1616,54 @@ int dm_suspend(struct mapped_device *md, unsigned suspend_flags)
}
/*
- * First we set the BLOCK_IO flag so no more ios will be mapped.
+ * Here we must make sure that no processes are submitting requests
+ * to target drivers i.e. no one may be executing
+ * __split_and_process_bio. This is called from dm_request and
+ * dm_wq_work.
+ *
+ * To get all processes out of __split_and_process_bio in dm_request,
+ * we take the write lock. To prevent any process from reentering
+ * __split_and_process_bio from dm_request, we set
+ * DMF_QUEUE_IO_TO_THREAD.
+ *
+ * To quiesce the thread (dm_wq_work), we set DMF_BLOCK_IO_FOR_SUSPEND
+ * and call flush_workqueue(md->wq). flush_workqueue will wait until
+ * dm_wq_work exits and DMF_BLOCK_IO_FOR_SUSPEND will prevent any
+ * further calls to __split_and_process_bio from dm_wq_work.
*/
down_write(&md->io_lock);
- set_bit(DMF_BLOCK_IO, &md->flags);
-
- add_wait_queue(&md->wait, &wait);
+ set_bit(DMF_BLOCK_IO_FOR_SUSPEND, &md->flags);
+ set_bit(DMF_QUEUE_IO_TO_THREAD, &md->flags);
up_write(&md->io_lock);
- /* unplug */
- if (map)
- dm_table_unplug_all(map);
+ flush_workqueue(md->wq);
/*
- * Wait for the already-mapped ios to complete.
+ * At this point no more requests are entering target request routines.
+ * We call dm_wait_for_completion to wait for all existing requests
+ * to finish.
*/
- r = dm_wait_for_completion(md);
+ r = dm_wait_for_completion(md, TASK_INTERRUPTIBLE);
down_write(&md->io_lock);
- remove_wait_queue(&md->wait, &wait);
-
if (noflush)
- __merge_pushback_list(md);
+ clear_bit(DMF_NOFLUSH_SUSPENDING, &md->flags);
up_write(&md->io_lock);
/* were we interrupted ? */
if (r < 0) {
- dm_queue_flush(md, DM_WQ_FLUSH_DEFERRED, NULL);
+ dm_queue_flush(md);
unlock_fs(md);
goto out; /* pushback list is already flushed, so skip flush */
}
+ /*
+ * If dm_wait_for_completion returned 0, the device is completely
+ * quiescent now. There is no request-processing activity. All new
+ * requests are being added to md->deferred list.
+ */
+
dm_table_postsuspend_targets(map);
set_bit(DMF_SUSPENDED, &md->flags);
@@ -1645,7 +1698,7 @@ int dm_resume(struct mapped_device *md)
if (r)
goto out;
- dm_queue_flush(md, DM_WQ_FLUSH_DEFERRED, NULL);
+ dm_queue_flush(md);
unlock_fs(md);