diff options
Diffstat (limited to 'drivers/md/md.c')
-rw-r--r-- | drivers/md/md.c | 111 |
1 files changed, 109 insertions, 2 deletions
diff --git a/drivers/md/md.c b/drivers/md/md.c index 08f7471d015..1297c9db0cf 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c @@ -217,12 +217,12 @@ static int md_make_request(struct request_queue *q, struct bio *bio) return 0; } rcu_read_lock(); - if (mddev->suspended) { + if (mddev->suspended || mddev->barrier) { DEFINE_WAIT(__wait); for (;;) { prepare_to_wait(&mddev->sb_wait, &__wait, TASK_UNINTERRUPTIBLE); - if (!mddev->suspended) + if (!mddev->suspended && !mddev->barrier) break; rcu_read_unlock(); schedule(); @@ -264,11 +264,117 @@ static void mddev_resume(mddev_t *mddev) int mddev_congested(mddev_t *mddev, int bits) { + if (mddev->barrier) + return 1; return mddev->suspended; } EXPORT_SYMBOL(mddev_congested); +/* + * Generic barrier handling for md + */ + +static void md_end_barrier(struct bio *bio, int err) +{ + mdk_rdev_t *rdev = bio->bi_private; + mddev_t *mddev = rdev->mddev; + if (err == -EOPNOTSUPP && mddev->barrier != (void*)1) + set_bit(BIO_EOPNOTSUPP, &mddev->barrier->bi_flags); + + rdev_dec_pending(rdev, mddev); + + if (atomic_dec_and_test(&mddev->flush_pending)) { + if (mddev->barrier == (void*)1) { + mddev->barrier = NULL; + wake_up(&mddev->sb_wait); + } else + schedule_work(&mddev->barrier_work); + } + bio_put(bio); +} + +static void md_submit_barrier(struct work_struct *ws) +{ + mddev_t *mddev = container_of(ws, mddev_t, barrier_work); + struct bio *bio = mddev->barrier; + + atomic_set(&mddev->flush_pending, 1); + if (!test_bit(BIO_EOPNOTSUPP, &bio->bi_flags)) { + mdk_rdev_t *rdev; + + bio->bi_rw &= ~(1<<BIO_RW_BARRIER); + if (mddev->pers->make_request(mddev->queue, bio)) + generic_make_request(bio); + mddev->barrier = (void*)1; + rcu_read_lock(); + list_for_each_entry_rcu(rdev, &mddev->disks, same_set) + if (rdev->raid_disk >= 0 && + !test_bit(Faulty, &rdev->flags)) { + /* Take two references, one is dropped + * when request finishes, one after + * we reclaim rcu_read_lock + */ + struct bio *bi; + atomic_inc(&rdev->nr_pending); + atomic_inc(&rdev->nr_pending); + rcu_read_unlock(); + bi = bio_alloc(GFP_KERNEL, 0); + bi->bi_end_io = md_end_barrier; + bi->bi_private = rdev; + bi->bi_bdev = rdev->bdev; + atomic_inc(&mddev->flush_pending); + submit_bio(WRITE_BARRIER, bi); + rcu_read_lock(); + rdev_dec_pending(rdev, mddev); + } + rcu_read_unlock(); + } else + bio_endio(bio, -EOPNOTSUPP); + if (atomic_dec_and_test(&mddev->flush_pending)) { + mddev->barrier = NULL; + wake_up(&mddev->sb_wait); + } +} + +void md_barrier_request(mddev_t *mddev, struct bio *bio) +{ + mdk_rdev_t *rdev; + + spin_lock_irq(&mddev->write_lock); + wait_event_lock_irq(mddev->sb_wait, + !mddev->barrier, + mddev->write_lock, /*nothing*/); + mddev->barrier = bio; + spin_unlock_irq(&mddev->write_lock); + + atomic_set(&mddev->flush_pending, 1); + INIT_WORK(&mddev->barrier_work, md_submit_barrier); + + + rcu_read_lock(); + list_for_each_entry_rcu(rdev, &mddev->disks, same_set) + if (rdev->raid_disk >= 0 && + !test_bit(Faulty, &rdev->flags)) { + struct bio *bi; + + atomic_inc(&rdev->nr_pending); + atomic_inc(&rdev->nr_pending); + rcu_read_unlock(); + bi = bio_alloc(GFP_KERNEL, 0); + bi->bi_end_io = md_end_barrier; + bi->bi_private = rdev; + bi->bi_bdev = rdev->bdev; + atomic_inc(&mddev->flush_pending); + submit_bio(WRITE_BARRIER, bi); + rcu_read_lock(); + rdev_dec_pending(rdev, mddev); + } + rcu_read_unlock(); + if (atomic_dec_and_test(&mddev->flush_pending)) + schedule_work(&mddev->barrier_work); +} +EXPORT_SYMBOL(md_barrier_request); static inline mddev_t *mddev_get(mddev_t *mddev) { atomic_inc(&mddev->active); @@ -377,6 +483,7 @@ static mddev_t * mddev_find(dev_t unit) atomic_set(&new->openers, 0); atomic_set(&new->active_io, 0); spin_lock_init(&new->write_lock); + atomic_set(&new->flush_pending, 0); init_waitqueue_head(&new->sb_wait); init_waitqueue_head(&new->recovery_wait); new->reshape_position = MaxSector; |