diff options
Diffstat (limited to 'drivers/md/dm.c')
| -rw-r--r-- | drivers/md/dm.c | 143 |
1 files changed, 100 insertions, 43 deletions
diff --git a/drivers/md/dm.c b/drivers/md/dm.c index 44a2fa6814c..32b958dbc49 100644 --- a/drivers/md/dm.c +++ b/drivers/md/dm.c @@ -54,6 +54,8 @@ static void do_deferred_remove(struct work_struct *w); static DECLARE_WORK(deferred_remove_work, do_deferred_remove); +static struct workqueue_struct *deferred_remove_workqueue; + /* * For bio-based dm. * One of these is allocated per bio. @@ -94,13 +96,6 @@ struct dm_rq_clone_bio_info { struct bio clone; }; -union map_info *dm_get_mapinfo(struct bio *bio) -{ - if (bio && bio->bi_private) - return &((struct dm_target_io *)bio->bi_private)->info; - return NULL; -} - union map_info *dm_get_rq_mapinfo(struct request *rq) { if (rq && rq->end_io_data) @@ -200,8 +195,8 @@ struct mapped_device { /* forced geometry settings */ struct hd_geometry geometry; - /* sysfs handle */ - struct kobject kobj; + /* kobject and completion */ + struct dm_kobject_holder kobj_holder; /* zero-length flush that will be cloned and submitted to targets */ struct bio flush_bio; @@ -283,16 +278,24 @@ static int __init local_init(void) if (r) goto out_free_rq_tio_cache; + deferred_remove_workqueue = alloc_workqueue("kdmremove", WQ_UNBOUND, 1); + if (!deferred_remove_workqueue) { + r = -ENOMEM; + goto out_uevent_exit; + } + _major = major; r = register_blkdev(_major, _name); if (r < 0) - goto out_uevent_exit; + goto out_free_workqueue; if (!_major) _major = r; return 0; +out_free_workqueue: + destroy_workqueue(deferred_remove_workqueue); out_uevent_exit: dm_uevent_exit(); out_free_rq_tio_cache: @@ -306,6 +309,7 @@ out_free_io_cache: static void local_exit(void) { flush_scheduled_work(); + destroy_workqueue(deferred_remove_workqueue); kmem_cache_destroy(_rq_tio_cache); kmem_cache_destroy(_io_cache); @@ -414,7 +418,7 @@ static void dm_blk_close(struct gendisk *disk, fmode_t mode) if (atomic_dec_and_test(&md->open_count) && (test_bit(DMF_DEFERRED_REMOVE, &md->flags))) - schedule_work(&deferred_remove_work); + queue_work(deferred_remove_workqueue, &deferred_remove_work); dm_put(md); @@ -475,6 +479,11 @@ sector_t dm_get_size(struct mapped_device *md) return get_capacity(md->disk); } +struct request_queue *dm_get_md_queue(struct mapped_device *md) +{ + return md->queue; +} + struct dm_stats *dm_get_stats(struct mapped_device *md) { return &md->stats; @@ -757,10 +766,18 @@ static void dec_pending(struct dm_io *io, int error) } } +static void disable_write_same(struct mapped_device *md) +{ + struct queue_limits *limits = dm_get_queue_limits(md); + + /* device doesn't really support WRITE SAME, disable it */ + limits->max_write_same_sectors = 0; +} + static void clone_endio(struct bio *bio, int error) { int r = 0; - struct dm_target_io *tio = bio->bi_private; + struct dm_target_io *tio = container_of(bio, struct dm_target_io, clone); struct dm_io *io = tio->io; struct mapped_device *md = tio->io->md; dm_endio_fn endio = tio->ti->type->end_io; @@ -785,6 +802,10 @@ static void clone_endio(struct bio *bio, int error) } } + if (unlikely(r == -EREMOTEIO && (bio->bi_rw & REQ_WRITE_SAME) && + !bdev_get_queue(bio->bi_bdev)->limits.max_write_same_sectors)) + disable_write_same(md); + free_tio(md, tio); dec_pending(io, error); } @@ -794,7 +815,8 @@ static void clone_endio(struct bio *bio, int error) */ static void end_clone_bio(struct bio *clone, int error) { - struct dm_rq_clone_bio_info *info = clone->bi_private; + struct dm_rq_clone_bio_info *info = + container_of(clone, struct dm_rq_clone_bio_info, clone); struct dm_rq_target_io *tio = info->tio; struct bio *bio = info->orig; unsigned int nr_bytes = info->orig->bi_iter.bi_size; @@ -978,6 +1000,10 @@ static void dm_done(struct request *clone, int error, bool mapped) r = rq_end_io(tio->ti, clone, error, &tio->info); } + if (unlikely(r == -EREMOTEIO && (clone->cmd_flags & REQ_WRITE_SAME) && + !clone->q->limits.max_write_same_sectors)) + disable_write_same(tio->md); + if (r <= 0) /* The target wants to complete the I/O */ dm_end_request(clone, r); @@ -1111,6 +1137,46 @@ int dm_set_target_max_io_len(struct dm_target *ti, sector_t len) } EXPORT_SYMBOL_GPL(dm_set_target_max_io_len); +/* + * A target may call dm_accept_partial_bio only from the map routine. It is + * allowed for all bio types except REQ_FLUSH. + * + * dm_accept_partial_bio informs the dm that the target only wants to process + * additional n_sectors sectors of the bio and the rest of the data should be + * sent in a next bio. + * + * A diagram that explains the arithmetics: + * +--------------------+---------------+-------+ + * | 1 | 2 | 3 | + * +--------------------+---------------+-------+ + * + * <-------------- *tio->len_ptr ---------------> + * <------- bi_size -------> + * <-- n_sectors --> + * + * Region 1 was already iterated over with bio_advance or similar function. + * (it may be empty if the target doesn't use bio_advance) + * Region 2 is the remaining bio size that the target wants to process. + * (it may be empty if region 1 is non-empty, although there is no reason + * to make it empty) + * The target requires that region 3 is to be sent in the next bio. + * + * If the target wants to receive multiple copies of the bio (via num_*bios, etc), + * the partially processed part (the sum of regions 1+2) must be the same for all + * copies of the bio. + */ +void dm_accept_partial_bio(struct bio *bio, unsigned n_sectors) +{ + struct dm_target_io *tio = container_of(bio, struct dm_target_io, clone); + unsigned bi_size = bio->bi_iter.bi_size >> SECTOR_SHIFT; + BUG_ON(bio->bi_rw & REQ_FLUSH); + BUG_ON(bi_size > *tio->len_ptr); + BUG_ON(n_sectors > bi_size); + *tio->len_ptr -= bi_size - n_sectors; + bio->bi_iter.bi_size = n_sectors << SECTOR_SHIFT; +} +EXPORT_SYMBOL_GPL(dm_accept_partial_bio); + static void __map_bio(struct dm_target_io *tio) { int r; @@ -1120,7 +1186,6 @@ static void __map_bio(struct dm_target_io *tio) struct dm_target *ti = tio->ti; clone->bi_end_io = clone_endio; - clone->bi_private = tio; /* * Map the clone. If r == 0 we don't need to do @@ -1154,10 +1219,10 @@ struct clone_info { struct bio *bio; struct dm_io *io; sector_t sector; - sector_t sector_count; + unsigned sector_count; }; -static void bio_setup_sector(struct bio *bio, sector_t sector, sector_t len) +static void bio_setup_sector(struct bio *bio, sector_t sector, unsigned len) { bio->bi_iter.bi_sector = sector; bio->bi_iter.bi_size = to_bytes(len); @@ -1195,7 +1260,6 @@ static struct dm_target_io *alloc_tio(struct clone_info *ci, tio->io = ci->io; tio->ti = ti; - memset(&tio->info, 0, sizeof(tio->info)); tio->target_bio_nr = target_bio_nr; return tio; @@ -1203,11 +1267,13 @@ static struct dm_target_io *alloc_tio(struct clone_info *ci, static void __clone_and_map_simple_bio(struct clone_info *ci, struct dm_target *ti, - unsigned target_bio_nr, sector_t len) + unsigned target_bio_nr, unsigned *len) { struct dm_target_io *tio = alloc_tio(ci, ti, ci->bio->bi_max_vecs, target_bio_nr); struct bio *clone = &tio->clone; + tio->len_ptr = len; + /* * Discard requests require the bio's inline iovecs be initialized. * ci->bio->bi_max_vecs is BIO_INLINE_VECS anyway, for both flush @@ -1215,13 +1281,13 @@ static void __clone_and_map_simple_bio(struct clone_info *ci, */ __bio_clone_fast(clone, ci->bio); if (len) - bio_setup_sector(clone, ci->sector, len); + bio_setup_sector(clone, ci->sector, *len); __map_bio(tio); } static void __send_duplicate_bios(struct clone_info *ci, struct dm_target *ti, - unsigned num_bios, sector_t len) + unsigned num_bios, unsigned *len) { unsigned target_bio_nr; @@ -1236,13 +1302,13 @@ static int __send_empty_flush(struct clone_info *ci) BUG_ON(bio_has_data(ci->bio)); while ((ti = dm_table_get_target(ci->map, target_nr++))) - __send_duplicate_bios(ci, ti, ti->num_flush_bios, 0); + __send_duplicate_bios(ci, ti, ti->num_flush_bios, NULL); return 0; } static void __clone_and_map_data_bio(struct clone_info *ci, struct dm_target *ti, - sector_t sector, unsigned len) + sector_t sector, unsigned *len) { struct bio *bio = ci->bio; struct dm_target_io *tio; @@ -1257,7 +1323,8 @@ static void __clone_and_map_data_bio(struct clone_info *ci, struct dm_target *ti for (target_bio_nr = 0; target_bio_nr < num_target_bios; target_bio_nr++) { tio = alloc_tio(ci, ti, 0, target_bio_nr); - clone_bio(tio, bio, sector, len); + tio->len_ptr = len; + clone_bio(tio, bio, sector, *len); __map_bio(tio); } } @@ -1286,7 +1353,7 @@ static int __send_changing_extent_only(struct clone_info *ci, is_split_required_fn is_split_required) { struct dm_target *ti; - sector_t len; + unsigned len; unsigned num_bios; do { @@ -1305,11 +1372,11 @@ static int __send_changing_extent_only(struct clone_info *ci, return -EOPNOTSUPP; if (is_split_required && !is_split_required(ti)) - len = min(ci->sector_count, max_io_len_target_boundary(ci->sector, ti)); + len = min((sector_t)ci->sector_count, max_io_len_target_boundary(ci->sector, ti)); else - len = min(ci->sector_count, max_io_len(ci->sector, ti)); + len = min((sector_t)ci->sector_count, max_io_len(ci->sector, ti)); - __send_duplicate_bios(ci, ti, num_bios, len); + __send_duplicate_bios(ci, ti, num_bios, &len); ci->sector += len; } while (ci->sector_count -= len); @@ -1348,7 +1415,7 @@ static int __split_and_process_non_flush(struct clone_info *ci) len = min_t(sector_t, max_io_len(ci->sector, ti), ci->sector_count); - __clone_and_map_data_bio(ci, ti, ci->sector, len); + __clone_and_map_data_bio(ci, ti, ci->sector, &len); ci->sector += len; ci->sector_count -= len; @@ -1442,7 +1509,6 @@ static int dm_merge_bvec(struct request_queue *q, * just one page. */ else if (queue_max_hw_sectors(q) <= PAGE_SIZE >> 9) - max_size = 0; out: @@ -1530,7 +1596,6 @@ static int dm_rq_bio_constructor(struct bio *bio, struct bio *bio_orig, info->orig = bio_orig; info->tio = tio; bio->bi_end_io = end_clone_bio; - bio->bi_private = info; return 0; } @@ -1548,7 +1613,6 @@ static int setup_clone(struct request *clone, struct request *rq, clone->cmd = rq->cmd; clone->cmd_len = rq->cmd_len; clone->sense = rq->sense; - clone->buffer = rq->buffer; clone->end_io = end_clone_request; clone->end_io_data = tio; @@ -1908,6 +1972,7 @@ static struct mapped_device *alloc_dev(int minor) init_waitqueue_head(&md->wait); INIT_WORK(&md->work, dm_wq_work); init_waitqueue_head(&md->eventq); + init_completion(&md->kobj_holder.completion); md->disk->major = _major; md->disk->first_minor = minor; @@ -2171,7 +2236,7 @@ static struct dm_table *__unbind(struct mapped_device *md) return NULL; dm_table_event_callback(map, NULL, NULL); - rcu_assign_pointer(md->map, NULL); + RCU_INIT_POINTER(md->map, NULL); dm_sync_table(md); return map; @@ -2450,7 +2515,7 @@ static void dm_wq_work(struct work_struct *work) static void dm_queue_flush(struct mapped_device *md) { clear_bit(DMF_BLOCK_IO_FOR_SUSPEND, &md->flags); - smp_mb__after_clear_bit(); + smp_mb__after_atomic(); queue_work(md->wq, &md->work); } @@ -2769,20 +2834,14 @@ struct gendisk *dm_disk(struct mapped_device *md) struct kobject *dm_kobject(struct mapped_device *md) { - return &md->kobj; + return &md->kobj_holder.kobj; } -/* - * struct mapped_device should not be exported outside of dm.c - * so use this check to verify that kobj is part of md structure - */ struct mapped_device *dm_get_from_kobject(struct kobject *kobj) { struct mapped_device *md; - md = container_of(kobj, struct mapped_device, kobj); - if (&md->kobj != kobj) - return NULL; + md = container_of(kobj, struct mapped_device, kobj_holder.kobj); if (test_bit(DMF_FREEING, &md->flags) || dm_deleting_md(md)) @@ -2878,8 +2937,6 @@ static const struct block_device_operations dm_blk_dops = { .owner = THIS_MODULE }; -EXPORT_SYMBOL(dm_get_mapinfo); - /* * module hooks */ |
