aboutsummaryrefslogtreecommitdiff
path: root/drivers/md/md.c
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/md/md.c')
-rw-r--r--drivers/md/md.c161
1 files changed, 46 insertions, 115 deletions
diff --git a/drivers/md/md.c b/drivers/md/md.c
index c148b630215..225815197a3 100644
--- a/drivers/md/md.c
+++ b/drivers/md/md.c
@@ -36,7 +36,7 @@
#include <linux/blkdev.h>
#include <linux/sysctl.h>
#include <linux/seq_file.h>
-#include <linux/smp_lock.h>
+#include <linux/mutex.h>
#include <linux/buffer_head.h> /* for invalidate_bdev */
#include <linux/poll.h>
#include <linux/ctype.h>
@@ -57,6 +57,7 @@
#define DEBUG 0
#define dprintk(x...) ((void)(DEBUG && printk(x)))
+static DEFINE_MUTEX(md_mutex);
#ifndef MODULE
static void autostart_arrays(int part);
@@ -226,12 +227,12 @@ static int md_make_request(struct request_queue *q, struct bio *bio)
return 0;
}
rcu_read_lock();
- if (mddev->suspended || mddev->barrier) {
+ if (mddev->suspended) {
DEFINE_WAIT(__wait);
for (;;) {
prepare_to_wait(&mddev->sb_wait, &__wait,
TASK_UNINTERRUPTIBLE);
- if (!mddev->suspended && !mddev->barrier)
+ if (!mddev->suspended)
break;
rcu_read_unlock();
schedule();
@@ -282,40 +283,29 @@ EXPORT_SYMBOL_GPL(mddev_resume);
int mddev_congested(mddev_t *mddev, int bits)
{
- if (mddev->barrier)
- return 1;
return mddev->suspended;
}
EXPORT_SYMBOL(mddev_congested);
/*
- * Generic barrier handling for md
+ * Generic flush handling for md
*/
-#define POST_REQUEST_BARRIER ((void*)1)
-
-static void md_end_barrier(struct bio *bio, int err)
+static void md_end_flush(struct bio *bio, int err)
{
mdk_rdev_t *rdev = bio->bi_private;
mddev_t *mddev = rdev->mddev;
- if (err == -EOPNOTSUPP && mddev->barrier != POST_REQUEST_BARRIER)
- set_bit(BIO_EOPNOTSUPP, &mddev->barrier->bi_flags);
rdev_dec_pending(rdev, mddev);
if (atomic_dec_and_test(&mddev->flush_pending)) {
- if (mddev->barrier == POST_REQUEST_BARRIER) {
- /* This was a post-request barrier */
- mddev->barrier = NULL;
- wake_up(&mddev->sb_wait);
- } else
- /* The pre-request barrier has finished */
- schedule_work(&mddev->barrier_work);
+ /* The pre-request flush has finished */
+ schedule_work(&mddev->flush_work);
}
bio_put(bio);
}
-static void submit_barriers(mddev_t *mddev)
+static void submit_flushes(mddev_t *mddev)
{
mdk_rdev_t *rdev;
@@ -332,60 +322,56 @@ static void submit_barriers(mddev_t *mddev)
atomic_inc(&rdev->nr_pending);
rcu_read_unlock();
bi = bio_alloc(GFP_KERNEL, 0);
- bi->bi_end_io = md_end_barrier;
+ bi->bi_end_io = md_end_flush;
bi->bi_private = rdev;
bi->bi_bdev = rdev->bdev;
atomic_inc(&mddev->flush_pending);
- submit_bio(WRITE_BARRIER, bi);
+ submit_bio(WRITE_FLUSH, bi);
rcu_read_lock();
rdev_dec_pending(rdev, mddev);
}
rcu_read_unlock();
}
-static void md_submit_barrier(struct work_struct *ws)
+static void md_submit_flush_data(struct work_struct *ws)
{
- mddev_t *mddev = container_of(ws, mddev_t, barrier_work);
- struct bio *bio = mddev->barrier;
+ mddev_t *mddev = container_of(ws, mddev_t, flush_work);
+ struct bio *bio = mddev->flush_bio;
atomic_set(&mddev->flush_pending, 1);
- if (test_bit(BIO_EOPNOTSUPP, &bio->bi_flags))
- bio_endio(bio, -EOPNOTSUPP);
- else if (bio->bi_size == 0)
+ if (bio->bi_size == 0)
/* an empty barrier - all done */
bio_endio(bio, 0);
else {
- bio->bi_rw &= ~REQ_HARDBARRIER;
+ bio->bi_rw &= ~REQ_FLUSH;
if (mddev->pers->make_request(mddev, bio))
generic_make_request(bio);
- mddev->barrier = POST_REQUEST_BARRIER;
- submit_barriers(mddev);
}
if (atomic_dec_and_test(&mddev->flush_pending)) {
- mddev->barrier = NULL;
+ mddev->flush_bio = NULL;
wake_up(&mddev->sb_wait);
}
}
-void md_barrier_request(mddev_t *mddev, struct bio *bio)
+void md_flush_request(mddev_t *mddev, struct bio *bio)
{
spin_lock_irq(&mddev->write_lock);
wait_event_lock_irq(mddev->sb_wait,
- !mddev->barrier,
+ !mddev->flush_bio,
mddev->write_lock, /*nothing*/);
- mddev->barrier = bio;
+ mddev->flush_bio = bio;
spin_unlock_irq(&mddev->write_lock);
atomic_set(&mddev->flush_pending, 1);
- INIT_WORK(&mddev->barrier_work, md_submit_barrier);
+ INIT_WORK(&mddev->flush_work, md_submit_flush_data);
- submit_barriers(mddev);
+ submit_flushes(mddev);
if (atomic_dec_and_test(&mddev->flush_pending))
- schedule_work(&mddev->barrier_work);
+ schedule_work(&mddev->flush_work);
}
-EXPORT_SYMBOL(md_barrier_request);
+EXPORT_SYMBOL(md_flush_request);
/* Support for plugging.
* This mirrors the plugging support in request_queue, but does not
@@ -696,31 +682,6 @@ static void super_written(struct bio *bio, int error)
bio_put(bio);
}
-static void super_written_barrier(struct bio *bio, int error)
-{
- struct bio *bio2 = bio->bi_private;
- mdk_rdev_t *rdev = bio2->bi_private;
- mddev_t *mddev = rdev->mddev;
-
- if (!test_bit(BIO_UPTODATE, &bio->bi_flags) &&
- error == -EOPNOTSUPP) {
- unsigned long flags;
- /* barriers don't appear to be supported :-( */
- set_bit(BarriersNotsupp, &rdev->flags);
- mddev->barriers_work = 0;
- spin_lock_irqsave(&mddev->write_lock, flags);
- bio2->bi_next = mddev->biolist;
- mddev->biolist = bio2;
- spin_unlock_irqrestore(&mddev->write_lock, flags);
- wake_up(&mddev->sb_wait);
- bio_put(bio);
- } else {
- bio_put(bio2);
- bio->bi_private = rdev;
- super_written(bio, error);
- }
-}
-
void md_super_write(mddev_t *mddev, mdk_rdev_t *rdev,
sector_t sector, int size, struct page *page)
{
@@ -729,51 +690,28 @@ void md_super_write(mddev_t *mddev, mdk_rdev_t *rdev,
* and decrement it on completion, waking up sb_wait
* if zero is reached.
* If an error occurred, call md_error
- *
- * As we might need to resubmit the request if REQ_HARDBARRIER
- * causes ENOTSUPP, we allocate a spare bio...
*/
struct bio *bio = bio_alloc(GFP_NOIO, 1);
- int rw = REQ_WRITE | REQ_SYNC | REQ_UNPLUG;
bio->bi_bdev = rdev->bdev;
bio->bi_sector = sector;
bio_add_page(bio, page, size, 0);
bio->bi_private = rdev;
bio->bi_end_io = super_written;
- bio->bi_rw = rw;
atomic_inc(&mddev->pending_writes);
- if (!test_bit(BarriersNotsupp, &rdev->flags)) {
- struct bio *rbio;
- rw |= REQ_HARDBARRIER;
- rbio = bio_clone(bio, GFP_NOIO);
- rbio->bi_private = bio;
- rbio->bi_end_io = super_written_barrier;
- submit_bio(rw, rbio);
- } else
- submit_bio(rw, bio);
+ submit_bio(REQ_WRITE | REQ_SYNC | REQ_UNPLUG | REQ_FLUSH | REQ_FUA,
+ bio);
}
void md_super_wait(mddev_t *mddev)
{
- /* wait for all superblock writes that were scheduled to complete.
- * if any had to be retried (due to BARRIER problems), retry them
- */
+ /* wait for all superblock writes that were scheduled to complete */
DEFINE_WAIT(wq);
for(;;) {
prepare_to_wait(&mddev->sb_wait, &wq, TASK_UNINTERRUPTIBLE);
if (atomic_read(&mddev->pending_writes)==0)
break;
- while (mddev->biolist) {
- struct bio *bio;
- spin_lock_irq(&mddev->write_lock);
- bio = mddev->biolist;
- mddev->biolist = bio->bi_next ;
- bio->bi_next = NULL;
- spin_unlock_irq(&mddev->write_lock);
- submit_bio(bio->bi_rw, bio);
- }
schedule();
}
finish_wait(&mddev->sb_wait, &wq);
@@ -1070,7 +1008,6 @@ static int super_90_validate(mddev_t *mddev, mdk_rdev_t *rdev)
clear_bit(Faulty, &rdev->flags);
clear_bit(In_sync, &rdev->flags);
clear_bit(WriteMostly, &rdev->flags);
- clear_bit(BarriersNotsupp, &rdev->flags);
if (mddev->raid_disks == 0) {
mddev->major_version = 0;
@@ -1485,7 +1422,6 @@ static int super_1_validate(mddev_t *mddev, mdk_rdev_t *rdev)
clear_bit(Faulty, &rdev->flags);
clear_bit(In_sync, &rdev->flags);
clear_bit(WriteMostly, &rdev->flags);
- clear_bit(BarriersNotsupp, &rdev->flags);
if (mddev->raid_disks == 0) {
mddev->major_version = 1;
@@ -1643,7 +1579,9 @@ static void super_1_sync(mddev_t *mddev, mdk_rdev_t *rdev)
bmask = queue_logical_block_size(rdev->bdev->bd_disk->queue)-1;
if (rdev->sb_size & bmask)
rdev->sb_size = (rdev->sb_size | bmask) + 1;
- }
+ } else
+ max_dev = le32_to_cpu(sb->max_dev);
+
for (i=0; i<max_dev;i++)
sb->dev_roles[i] = cpu_to_le16(0xfffe);
@@ -2167,9 +2105,9 @@ repeat:
rdev->recovery_offset = mddev->curr_resync_completed;
}
- if (mddev->external || !mddev->persistent) {
- clear_bit(MD_CHANGE_DEVS, &mddev->flags);
+ if (!mddev->persistent) {
clear_bit(MD_CHANGE_CLEAN, &mddev->flags);
+ clear_bit(MD_CHANGE_DEVS, &mddev->flags);
wake_up(&mddev->sb_wait);
return;
}
@@ -2178,7 +2116,6 @@ repeat:
mddev->utime = get_seconds();
- set_bit(MD_CHANGE_PENDING, &mddev->flags);
if (test_and_clear_bit(MD_CHANGE_DEVS, &mddev->flags))
force_change = 1;
if (test_and_clear_bit(MD_CHANGE_CLEAN, &mddev->flags))
@@ -3371,7 +3308,7 @@ array_state_show(mddev_t *mddev, char *page)
case 0:
if (mddev->in_sync)
st = clean;
- else if (test_bit(MD_CHANGE_CLEAN, &mddev->flags))
+ else if (test_bit(MD_CHANGE_PENDING, &mddev->flags))
st = write_pending;
else if (mddev->safemode)
st = active_idle;
@@ -3452,9 +3389,7 @@ array_state_store(mddev_t *mddev, const char *buf, size_t len)
mddev->in_sync = 1;
if (mddev->safemode == 1)
mddev->safemode = 0;
- if (mddev->persistent)
- set_bit(MD_CHANGE_CLEAN,
- &mddev->flags);
+ set_bit(MD_CHANGE_CLEAN, &mddev->flags);
}
err = 0;
} else
@@ -3466,8 +3401,7 @@ array_state_store(mddev_t *mddev, const char *buf, size_t len)
case active:
if (mddev->pers) {
restart_array(mddev);
- if (mddev->external)
- clear_bit(MD_CHANGE_CLEAN, &mddev->flags);
+ clear_bit(MD_CHANGE_PENDING, &mddev->flags);
wake_up(&mddev->sb_wait);
err = 0;
} else {
@@ -4506,7 +4440,6 @@ int md_run(mddev_t *mddev)
/* may be over-ridden by personality */
mddev->resync_max_sectors = mddev->dev_sectors;
- mddev->barriers_work = 1;
mddev->ok_start_degraded = start_dirty_degraded;
if (start_readonly && mddev->ro == 0)
@@ -4685,7 +4618,6 @@ static void md_clean(mddev_t *mddev)
mddev->recovery = 0;
mddev->in_sync = 0;
mddev->degraded = 0;
- mddev->barriers_work = 0;
mddev->safemode = 0;
mddev->bitmap_info.offset = 0;
mddev->bitmap_info.default_offset = 0;
@@ -5953,7 +5885,7 @@ static int md_open(struct block_device *bdev, fmode_t mode)
mddev_t *mddev = mddev_find(bdev->bd_dev);
int err;
- lock_kernel();
+ mutex_lock(&md_mutex);
if (mddev->gendisk != bdev->bd_disk) {
/* we are racing with mddev_put which is discarding this
* bd_disk.
@@ -5962,7 +5894,7 @@ static int md_open(struct block_device *bdev, fmode_t mode)
/* Wait until bdev->bd_disk is definitely gone */
flush_scheduled_work();
/* Then retry the open from the top */
- unlock_kernel();
+ mutex_unlock(&md_mutex);
return -ERESTARTSYS;
}
BUG_ON(mddev != bdev->bd_disk->private_data);
@@ -5976,7 +5908,7 @@ static int md_open(struct block_device *bdev, fmode_t mode)
check_disk_size_change(mddev->gendisk, bdev);
out:
- unlock_kernel();
+ mutex_unlock(&md_mutex);
return err;
}
@@ -5985,10 +5917,10 @@ static int md_release(struct gendisk *disk, fmode_t mode)
mddev_t *mddev = disk->private_data;
BUG_ON(!mddev);
- lock_kernel();
+ mutex_lock(&md_mutex);
atomic_dec(&mddev->openers);
mddev_put(mddev);
- unlock_kernel();
+ mutex_unlock(&md_mutex);
return 0;
}
@@ -6572,6 +6504,7 @@ void md_write_start(mddev_t *mddev, struct bio *bi)
if (mddev->in_sync) {
mddev->in_sync = 0;
set_bit(MD_CHANGE_CLEAN, &mddev->flags);
+ set_bit(MD_CHANGE_PENDING, &mddev->flags);
md_wakeup_thread(mddev->thread);
did_change = 1;
}
@@ -6580,7 +6513,6 @@ void md_write_start(mddev_t *mddev, struct bio *bi)
if (did_change)
sysfs_notify_dirent_safe(mddev->sysfs_state);
wait_event(mddev->sb_wait,
- !test_bit(MD_CHANGE_CLEAN, &mddev->flags) &&
!test_bit(MD_CHANGE_PENDING, &mddev->flags));
}
@@ -6616,6 +6548,7 @@ int md_allow_write(mddev_t *mddev)
if (mddev->in_sync) {
mddev->in_sync = 0;
set_bit(MD_CHANGE_CLEAN, &mddev->flags);
+ set_bit(MD_CHANGE_PENDING, &mddev->flags);
if (mddev->safemode_delay &&
mddev->safemode == 0)
mddev->safemode = 1;
@@ -6625,7 +6558,7 @@ int md_allow_write(mddev_t *mddev)
} else
spin_unlock_irq(&mddev->write_lock);
- if (test_bit(MD_CHANGE_CLEAN, &mddev->flags))
+ if (test_bit(MD_CHANGE_PENDING, &mddev->flags))
return -EAGAIN;
else
return 0;
@@ -6823,8 +6756,7 @@ void md_do_sync(mddev_t *mddev)
atomic_read(&mddev->recovery_active) == 0);
mddev->curr_resync_completed =
mddev->curr_resync;
- if (mddev->persistent)
- set_bit(MD_CHANGE_CLEAN, &mddev->flags);
+ set_bit(MD_CHANGE_CLEAN, &mddev->flags);
sysfs_notify(&mddev->kobj, NULL, "sync_completed");
}
@@ -7073,7 +7005,7 @@ void md_check_recovery(mddev_t *mddev)
if (mddev->ro && !test_bit(MD_RECOVERY_NEEDED, &mddev->recovery))
return;
if ( ! (
- (mddev->flags && !mddev->external) ||
+ (mddev->flags & ~ (1<<MD_CHANGE_PENDING)) ||
test_bit(MD_RECOVERY_NEEDED, &mddev->recovery) ||
test_bit(MD_RECOVERY_DONE, &mddev->recovery) ||
(mddev->external == 0 && mddev->safemode == 1) ||
@@ -7103,8 +7035,7 @@ void md_check_recovery(mddev_t *mddev)
mddev->recovery_cp == MaxSector) {
mddev->in_sync = 1;
did_change = 1;
- if (mddev->persistent)
- set_bit(MD_CHANGE_CLEAN, &mddev->flags);
+ set_bit(MD_CHANGE_CLEAN, &mddev->flags);
}
if (mddev->safemode == 1)
mddev->safemode = 0;